def test_mixture_of_mvn(self): mu1 = np.asarray([0.0, 1.0]) cov1 = np.diag([1.5, 2.5]) mu2 = np.asarray([1.0, 0.0]) cov2 = np.diag([2.5, 3.5]) obs = np.asarray([[0.5, 0.5], mu1, mu2]) with Model() as model: w = Dirichlet("w", floatX(np.ones(2)), transform=None, shape=(2, )) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component complogp_st = np.vstack(( st.multivariate_normal.logpdf(obs, mu1, cov1), st.multivariate_normal.logpdf(obs, mu2, cov2), )).T complogp = y.distribution._comp_logp(theano.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point mixlogp_st = logsumexp(np.log(testpoint["w"]) + complogp_st, axis=-1, keepdims=False) assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model priorlogp = st.dirichlet.logpdf( x=testpoint["w"], alpha=np.ones(2), ) assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
def test_mixture_list_of_normals(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) Mixture( "x_obs", w, [ Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1]) ], observed=self.norm_x, ) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1)
def test_normal_mixture(self): with Model() as model: w = Dirichlet('w', np.ones_like(self.norm_w)) mu = Normal('mu', 0., 10., shape=self.norm_w.size) tau = Gamma('tau', 1., 1., shape=self.norm_w.size) x_obs = NormalMixture('x_obs', w, mu, tau=tau, observed=self.norm_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False) assert_allclose(np.sort(trace['w'].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace['mu'].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1)
def test_mixture_list_of_normals(self): with Model() as model: w = Dirichlet('w', floatX(np.ones_like(self.norm_w))) mu = Normal('mu', 0., 10., shape=self.norm_w.size) tau = Gamma('tau', 1., 1., shape=self.norm_w.size) Mixture('x_obs', w, [ Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1]) ], observed=self.norm_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose([ np.sort(trace['w'].mean(axis=0)), np.sort(trace['mu'].mean(axis=0)) ], [np.sort(self.norm_w), np.sort(self.norm_mu)], rtol=0.1, atol=0.1) assert_allclose(np.sort(trace['mu'].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1)
def test_mixture_list_of_poissons(self): with Model() as model: w = Dirichlet('w', floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) mu = Gamma('mu', 1., 1., shape=self.pois_w.size) Mixture( 'x_obs', w, [Poisson.dist(mu[0]), Poisson.dist(mu[1])], observed=self.pois_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace['w'].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace['mu'].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1)
def test_poisson_mixture(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) Mixture("x_obs", w, Poisson.dist(mu), observed=self.pois_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) assert_allclose( np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1 )
def test_normal_mixture_nd(self): nd, ncomp = 3, 5 with Model() as model0: mus = Normal('mus', shape=(nd, ncomp)) taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp)) ws = Dirichlet('ws', np.ones(ncomp)) mixture0 = NormalMixture('m', w=ws, mu=mus, tau=taus, shape=nd) with Model() as model1: mus = Normal('mus', shape=(nd, ncomp)) taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp)) ws = Dirichlet('ws', np.ones(ncomp)) comp_dist = [ Normal.dist(mu=mus[:, i], tau=taus[:, i]) for i in range(ncomp) ] mixture1 = Mixture('m', w=ws, comp_dists=comp_dist, shape=nd) testpoint = model0.test_point testpoint['mus'] = np.random.randn(nd, ncomp) assert_allclose(model0.logp(testpoint), model1.logp(testpoint)) assert_allclose(mixture0.logp(testpoint), mixture1.logp(testpoint))
def run_mv_model(data, K=3, n_feats=2, mus=None, mc_samples=10000, jobs=1): with pm.Model() as model: n_samples = len(data) tau = pm.Deterministic('tau', pm.floatX(tt.eye(n_feats) * 10)) mus = 0. if mus is None else mus mus = MvNormal('mus', mu=mus, tau=tau, shape=(K, n_feats)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) category = pm.Categorical('category', p=pi, shape=n_samples) xs = pm.MvNormal('x', mu=mus[category], tau=tt.eye(n_feats), observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step2, n_jobs=jobs) pm.traceplot(trace, varnames=['mus', 'pi', 'tau']) plt.title('mv model') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) return model, mod, trace
def run_normal_mv_model(data, K=3, mus=None, mc_samples=10000, jobs=1): with pm.Model() as model: n_samples, n_feats = data.shape #print n_samples,n_feats packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus #mus = pm.Normal('mus', mu = [[10,10], [55,55], [105,105], [155,155], [205,205]], sd = 10, shape=(K,n_feats)) mus = pm.Normal('mus', mu=mus, sd=10., shape=(K, n_feats), testval=data.mean(axis=0)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) #TODO one pi per voxel category = pm.Categorical('category', p=pi, shape=n_samples) xs = pm.MvNormal('x', mu=mus[category], chol=L, observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step2, n_jobs=jobs) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma']) plt.title('normal mv model') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
def f(mask, aoi_id, plot=False): ''' * run parallel process ''' # turn off pymc3 logging getLogger("pymc3").setLevel(ERROR) # get transform object for the dataset (nw corner & resolution) transform = from_origin(mask['bounds'][0], mask['bounds'][3], mask['resolution'], mask['resolution']) # check that output directory is there if not exists("./out/"): makedirs("./out/") # seed data and uncertainty arrays for the study area and build dictionary to control outputs c_data = zeros( (ceil((mask['bounds'][3] - mask['bounds'][1]) / mask['resolution']), ceil((mask['bounds'][2] - mask['bounds'][0]) / mask['resolution']))) outputs = { 'catholic': { 'path': f'./out/{aoi_id}_catholic.tif', 'mean': c_data, 'low': c_data.copy(), 'high': c_data.copy() }, 'protestant': { 'path': f'./out/{aoi_id}_protestant.tif', 'mean': c_data.copy(), 'low': c_data.copy(), 'high': c_data.copy() }, 'mixed': { 'path': f'./out/{aoi_id}_mixed.tif', 'mean': c_data.copy(), 'low': c_data.copy(), 'high': c_data.copy() } } # extract list of group names groups = array(list(outputs.keys())) # use try-finally so if it fails we can see where it got up to # try: print(f"AOI Dimensions: {c_data.shape[1]}x{c_data.shape[0]}px") # loop through rows and columns in the dataset for row in range(c_data.shape[0]): for col in range(c_data.shape[1]): print( f"\t...{row * c_data.shape[1] + col} of {c_data.shape[0] * c_data.shape[1]} ({(row * c_data.shape[1] + col)/(c_data.shape[0] * c_data.shape[1])*100:.2f}%)" ) # get coordinates for the point point = Point(array2Coords(transform, row, col)) ''' calculate hyperparameters (priors) ''' # get the census data for the census Small Area that contains the point possible_matches = mask['census'].iloc[list( mask['census'].sindex.intersection(point.bounds))] district = possible_matches.loc[possible_matches.contains(point)][[ 'pcCatholic', 'pcProtesta', 'pc_Other', 'pc_None' ]] # make sure that there was a match at all! if len(district.index) > 0: # compute proportions for the three groups # replace zeros for 1s as you are not allowed 0's in the hyperparameters (gives Bad initial energy error) alphas = maximum( ones(3), array([ int(round(district['pcCatholic'].iloc[0])), int(round(district['pcProtesta'].iloc[0])), int( round(district['pc_Other'].iloc[0] + district['pc_None'].iloc[0])) ])) else: # if no matches, have equal belief for each group alphas = array([1, 1, 1]) ''' calculate observations ''' # init lists for observations c = [] n = [] # construct the radius for analysis polygon = point.buffer(mask['radius']) # loop through each dataset for i, gdf in mask['datasets'].items(): # check that there is data available (this is if no data has been # passed in the mask as the clip polygon does not intersect any) if len(gdf.index) > 0: # get data points within and get IDW2 multiplier possible_matches = gdf.iloc[list( gdf.sindex.intersection(polygon.bounds))] observations = possible_matches.loc[ possible_matches.within(polygon)] observations['idw2'] = ( 1 - observations.geometry.distance(point) / mask['radius'])**2 # check that there is data available (this is if data has been # passed but the buffer polygon does not intersect it) if len(observations) > 0: # get weighted group counts for the current dataset if i == 'mapme': catholics, protestants, mixed = getMapmeGroups( observations) elif i == 'gps': catholics, protestants, mixed = getGpsGroups( observations) elif i == 'survey': catholics, protestants, mixed = getSurveyGroups( observations) # index and int the scores for each dataset sums = [catholics, protestants, mixed] # catch error caused by no probabilities if sum(sums) > 0: print(sums) # process into correct format sums = [ int(round(i / sum(sums) * 100)) for i in sums ] # append to observations list c.append(sums) n.append(sum(sums)) # TODO: DO I WANT ALL THESE 0'S OR ARE THEY GOING TO CAUSE PROBLEMS? else: # if not matches, just append some empty data c.append([0, 0, 0]) n.append(0) else: # if not matches, just append some empty data c.append([0, 0, 0]) n.append(0) else: # if not matches, just append some empty data c.append([0, 0, 0]) n.append(0) # convert observations np array c = array(c) n = array(n) # print(alphas, c, n) # print() ''' run model ''' # start making MCC model with Model() as model: # TODO: LOOK INTO TESTVALS FOR PARAMETERS # https://nbviewer.jupyter.org/github/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/blob/master/Chapter3_MCMC/Ch3_IntroMCMC_PyMC3.ipynb#Intelligent-starting-values # parameters of the Multinomial are from a Dirichlet parameters = Dirichlet('parameters', a=alphas, shape=3) # observed data is from a Multinomial distribution observed_data = Multinomial('observed_data', n=n, p=parameters, shape=3, observed=c) with model: # estimate the Maximum a Posterior # start = find_MAP() #don't use this - it prevents convergence! # sample from the posterior (NUTS is default so is not explicitly stated) trace = sample( # start=start, # start at the MAP to increase chance of convergence -- DON'T DO THIS! draws=1000, # number of sample draws chains= 4, # number of chains in which the above are drawn (match cores) cores=1, # max permitted by library tune=500, # how many will be discarded (>=50% of draws) discard_tuned_samples=True, # discard the tuning samples progressbar= False, # avoid unnecessarilly filling up the output file target_accept= 0.9 # up from 0.8 to avoid false positives: https://eigenfoo.xyz/bayesian-modelling-cookbook/#fixing-divergences ) if plot: plot_trace(trace, show=True) # retrieve summary data results = summary(trace) results.index = groups # output the result to the datasets for k, v in outputs.items(): v['mean'][row, col] = results.loc[k, 'mean'] v['low'][row, col] = results.loc[k, 'hpd_3%'] v['high'][row, col] = results.loc[k, 'hpd_97%'] # if we get an error - print some debugging info # except Exception as e: # print("\n--- EXCEPTION ---") # print(e) # print(row, col, point) # if (sums): # print(sums) # else: # print("sums not defined yet") # print(c, n) # # # whatever happens, output the results to files # finally: # loop through outputs for g in outputs.values(): # output dataset to raster (hardcoded crs as was causing error) with rio_open(g['path'], 'w', driver='GTiff', height=g['mean'].shape[0], width=g['mean'].shape[1], count=3, dtype='float64', crs="EPSG:29902", transform=transform) as out: # add data and uncertainties as raster bands out.write(g['mean'], 1) out.write(g['low'], 2) out.write(g['high'], 3)
def run_normal_mv_model_mixture(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 shp = (K, n_feats) mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0) with pm.Model() as model: packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus sds = pm.HalfNormal('sds', sd=tt.ones(shp) * 100, shape=shp) mus = pm.Normal('mus', mu=tt.as_tensor_variable(mus_start), sd=sds, shape=shp) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) # #TODO one pi per voxel #category = pm.Categorical('category', p=pi, shape = n_samples ) mvs = [pm.MvNormal.dist(mu=mus[i], chol=L) for i in range(K)] # #aux2 = tt.set_subtensor(aux[inds],category[to_fill]) #prior = pm.Deterministic('prior',(tt.sum(tt.eq( aux2.reshape( (n_samples,max_neigs ) ), # category.reshape( (n_samples,1)) ), axis = 1 )+1)/1.0 ) pesos = pm.Dirichlet('pesos', a=np.ones((K, ))) #obs = pm.Mixture('obs',w = pesos, comp_dists = mvs, observed = data) obs = my_mixture('obs', w=pesos, comp_dists=mvs, observed=data) with model: #step2 = pm.CategoricalGibbsMetropolis(vars=[category] ) trace = sample(mc_samples, n_jobs=jobs, tune=500) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma', 'mvs', 'pesos']) plt.title('normal mv model 40 cols') logp_simple(mus, category, aux3) mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
def run_One_d_Model(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): def logp_simple(mus, category, aux3): def logp_(value): spatial_factor = 2 aux = tt.ones((n_samples, )) logps = tt.zeros((n_samples)) sumlogps = tt.zeros((K, n_samples)) pi = tt.sum(tt.eq(aux3, (aux * category).reshape((n_samples, 1))), axis=1) / 8.0 #TODO son logps y sumlops siempre sustituidos en todos lo valortes for i, label in enumerate(range(K)): pi_l = tt.sum(tt.eq(aux3, (aux * label).reshape( (n_samples, 1))), axis=1) / 8.0 sumlogps = tt.set_subtensor(sumlogps[i, :], (mus[label].logp(value)) + (pi_l - 1) * spatial_factor) sumlogps = tt.sum(sumlogps, axis=0) for label in range(K): indx = tt.eq(category, tt.as_tensor_variable(label)).nonzero() logps = tt.set_subtensor( logps[indx], (mus[label].logp(value)[indx]) + (pi[indx] - 1) * spatial_factor - sumlogps[indx]) return logps n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 shp = (K, n_feats) mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0) alpha = 0.1 * np.ones((n_samples, K)) with pm.Model() as model: mu = pm.Normal('mus', 100, mus_start, shape=K, testval=mus_start, transform=Ordered()) sd = pm.Uniform('sds', lower=0., upper=150., shape=K) #pi = Dirichlet('pi', a = alpha, shape= (n_samples, K) ) pi = Dirichlet('pi', a=alpha, shape=K) category = pm.Categorical('category', p=pi, shape=n_samples) shit_max = pm.Deterministic('shit_max', tt.max(category)) shit_min = pm.Deterministic('shit_min', tt.min(category)) x = pm.NormalMixture()
def run_normal_mv_model_mixture_DIY(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): def logp_simple(mus, category, aux3): def logp_(value): spatial_factor = 0.00 aux = tt.ones((n_samples, )) logps = tt.zeros((n_samples)) sumlogps = tt.zeros((K, n_samples)) pi = tt.sum(tt.eq(aux3, (aux * category).reshape((n_samples, 1))), axis=1) / 8.0 #TODO son logps y sumlops siempre sustituidos en todos lo valortes for i, label in enumerate(range(K)): pi_l = tt.sum(tt.eq(aux3, (aux * label).reshape( (n_samples, 1))), axis=1) / 8.0 sumlogps = tt.set_subtensor(sumlogps[i, :], (mus[label].logp(value)) + (pi_l - 1) * spatial_factor) sumlogps = tt.sum(sumlogps, axis=0) for label in range(K): indx = tt.eq(category, tt.as_tensor_variable(label)).nonzero() logps = tt.set_subtensor( logps[indx], (mus[label].logp(value)[indx]) + (pi[indx] - 1) * spatial_factor - sumlogps[indx]) return logps return logp_ #K = 3 n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 shp = (K, n_feats) mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0) alpha = 0.1 * np.ones((n_samples, K)) with pm.Model() as model: packed_L = [ pm.LKJCholeskyCov('packed_L_%d' % i, n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(K) ] L = [ pm.expand_packed_triangular(n_feats, packed_L[i]) for i in range(K) ] #sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus #sds = pm.Uniform('sds',lower=0., upper=150., shape = shp ) mus = pm.Normal('mus', mu=100., sd=1, shape=shp) pi = Dirichlet('pi', a=alpha, shape=(n_samples, K)) category = pm.Categorical('category', p=pi, shape=n_samples) shit_max = pm.Deterministic('shit_max', tt.max(category)) shit_min = pm.Deterministic('shit_min', tt.min(category)) #mvs = [MvNormal('mu_%d' % i, mu=mus[i],tau=pm.floatX(1. * np.eye(n_feats)),shape=(n_feats,)) for i in range(K)] mvs = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)] aux2 = tt.set_subtensor(aux[inds], category[to_fill]) xs = DensityDist('x', logp_simple(mvs, category, aux2.reshape((n_samples, max_neigs))), observed=data) with model: step2 = step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step=step2, tune=1000, chains=4) pm.traceplot(trace, varnames=['mus', 'sds']) plt.title('logp_sum_mo_alpha_700_tunes_spatial_2') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) return model, mod, trace
obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps]) obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)]) # X is now (nObs,K) X_start = np.concatenate([X_start[:, 0:T[i], i].T for i in range(N)]) # O is now (nObs, Dd) # TODO: implement this with sparse matrices O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)]) #import pdb; pdb.set_trace() model = Model() with model: #Fails: #pi = Dirichlet('pi', a = as_tensor_variable([0.147026,0.102571,0.239819,0.188710,0.267137,0.054738]), shape=M, testval = np.ones(M)/float(M)) pi = Dirichlet('pi', a=as_tensor_variable([ 0.147026, 0.102571, 0.239819, 0.188710, 0.267137, 0.054738 ]), shape=M) pi_min_potential = Potential('pi_min_potential', TT.switch(TT.min(pi) < .001, -np.inf, 0)) Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M)) #S = DiscreteObsMJP('S', pi=pi, Q=Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs), testval=np.ones(nObs,dtype='int32')) S = DiscreteObsMJP('S', pi=pi, Q=Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T,
def train_pymc3(docs_te, docs_tr, n_samples_te, n_samples_tr, n_words, n_topics, n_tokens): """ Return: Pymc3 LDA results Parameters: docs_tr: training documents (processed) docs_te: testing documents (processed) n_samples_te: number of testing docs n_samples_tr: number of training docs n_words: size of vocabulary n_topics: number of topics to learn n_tokens: number of non-zero datapoints in processed training tf matrix """ # Log-likelihood of documents for LDA def logp_lda_doc(beta, theta): """ Returns the log-likelihood function for given documents. K : number of topics in the model V : number of words (size of vocabulary) D : number of documents (in a mini-batch) Parameters ---------- beta : tensor (K x V) Word distribution. theta : tensor (D x K) Topic distributions for the documents. """ def ll_docs_f(docs): dixs, vixs = docs.nonzero() vfreqs = docs[dixs, vixs] ll_docs = vfreqs * pmmath.logsumexp( tt.log(theta[dixs]) + tt.log(beta.T[vixs]), axis=1).ravel() # Per-word log-likelihood times no. of tokens in the whole dataset return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9) * n_tokens return ll_docs_f # fit the pymc3 LDA # we have sparse dataset. It's better to have dence batch so that all words accure there minibatch_size = 128 # defining minibatch doc_t_minibatch = pm.Minibatch(docs_tr.toarray(), minibatch_size) doc_t = shared(docs_tr.toarray()[:minibatch_size]) with pm.Model() as model: theta = Dirichlet( 'theta', a=pm.floatX((1.0 / n_topics) * np.ones( (minibatch_size, n_topics))), shape=(minibatch_size, n_topics), transform=t_stick_breaking(1e-9), # do not forget scaling total_size=n_samples_tr) beta = Dirichlet('beta', a=pm.floatX((1.0 / n_topics) * np.ones( (n_topics, n_words))), shape=(n_topics, n_words), transform=t_stick_breaking(1e-9)) # Note, that we defined likelihood with scaling, so here we need no additional `total_size` kwarg doc = pm.DensityDist('doc', logp_lda_doc(beta, theta), observed=doc_t) # Encoder class LDAEncoder: """Encode (term-frequency) document vectors to variational means and (log-transformed) stds. """ def __init__(self, n_words, n_hidden, n_topics, p_corruption=0, random_seed=1): rng = np.random.RandomState(random_seed) self.n_words = n_words self.n_hidden = n_hidden self.n_topics = n_topics self.w0 = shared(0.01 * rng.randn(n_words, n_hidden).ravel(), name='w0') self.b0 = shared(0.01 * rng.randn(n_hidden), name='b0') self.w1 = shared(0.01 * rng.randn(n_hidden, 2 * (n_topics - 1)).ravel(), name='w1') self.b1 = shared(0.01 * rng.randn(2 * (n_topics - 1)), name='b1') self.rng = MRG_RandomStreams(seed=random_seed) self.p_corruption = p_corruption def encode(self, xs): if 0 < self.p_corruption: dixs, vixs = xs.nonzero() mask = tt.set_subtensor( tt.zeros_like(xs)[dixs, vixs], self.rng.binomial(size=dixs.shape, n=1, p=1 - self.p_corruption)) xs_ = xs * mask else: xs_ = xs w0 = self.w0.reshape((self.n_words, self.n_hidden)) w1 = self.w1.reshape((self.n_hidden, 2 * (self.n_topics - 1))) hs = tt.tanh(xs_.dot(w0) + self.b0) zs = hs.dot(w1) + self.b1 zs_mean = zs[:, :(self.n_topics - 1)] zs_rho = zs[:, (self.n_topics - 1):] return {'mu': zs_mean, 'rho': zs_rho} def get_params(self): return [self.w0, self.b0, self.w1, self.b1] # call Encoder encoder = LDAEncoder(n_words=n_words, n_hidden=100, n_topics=n_topics, p_corruption=0.0) local_RVs = OrderedDict([(theta, encoder.encode(doc_t))]) # get parameters encoder_params = encoder.get_params() # Train pymc3 Model η = .1 s = shared(η) def reduce_rate(a, h, i): s.set_value(η / ((i / minibatch_size) + 1)**.7) with model: approx = pm.MeanField(local_rv=local_RVs) approx.scale_cost_to_minibatch = False inference = pm.KLqp(approx) inference.fit(10000, callbacks=[reduce_rate], obj_optimizer=pm.sgd(learning_rate=s), more_obj_params=encoder_params, total_grad_norm_constraint=200, more_replacements={doc_t: doc_t_minibatch}) # Extracting characteristic words doc_t.set_value(docs_tr.toarray()) samples = pm.sample_approx(approx, draws=100) beta_pymc3 = samples['beta'].mean(axis=0) # Predictive distribution def calc_pp(ws, thetas, beta, wix): """ Parameters ---------- ws: ndarray (N,) Number of times the held-out word appeared in N documents. thetas: ndarray, shape=(N, K) Topic distributions for N documents. beta: ndarray, shape=(K, V) Word distributions for K topics. wix: int Index of the held-out word Return ------ Log probability of held-out words. """ return ws * np.log(thetas.dot(beta[:, wix])) def eval_lda(transform, beta, docs_te, wixs): """Evaluate LDA model by log predictive probability. Parameters ---------- transform: Python function Transform document vectors to posterior mean of topic proportions. wixs: iterable of int Word indices to be held-out. """ lpss = [] docs_ = deepcopy(docs_te) thetass = [] wss = [] total_words = 0 for wix in wixs: ws = docs_te[:, wix].ravel() if 0 < ws.sum(): # Hold-out docs_[:, wix] = 0 # Topic distributions thetas = transform(docs_) # Predictive log probability lpss.append(calc_pp(ws, thetas, beta, wix)) docs_[:, wix] = ws thetass.append(thetas) wss.append(ws) total_words += ws.sum() else: thetass.append(None) wss.append(None) # Log-probability lp = np.sum(np.hstack(lpss)) / total_words return {'lp': lp, 'thetass': thetass, 'beta': beta, 'wss': wss} inp = tt.matrix(dtype='int64') sample_vi_theta = theano.function([inp], approx.sample_node( approx.model.theta, 100, more_replacements={ doc_t: inp }).mean(0)) def transform_pymc3(docs): return sample_vi_theta(docs) result_pymc3 = eval_lda(transform_pymc3, beta_pymc3, docs_te.toarray(), np.arange(100)) print('Predictive log prob (pm3) = {}'.format(result_pymc3['lp'])) return result_pymc3
def test_mixture_of_mixture(self): nbr = 4 with Model() as model: # mixtures components g_comp = Normal.dist(mu=Exponential('mu_g', lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr) l_comp = Lognormal.dist(mu=Exponential('mu_l', lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr) # weight vector for the mixtures g_w = Dirichlet('g_w', a=floatX(np.ones(nbr) * 0.0000001), transform=None) l_w = Dirichlet('l_w', a=floatX(np.ones(nbr) * 0.0000001), transform=None) # mixture components g_mix = Mixture.dist(w=g_w, comp_dists=g_comp) l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures mix_w = Dirichlet('mix_w', a=floatX(np.ones(2)), transform=None) mix = Mixture('mix', w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)) test_point = model.test_point def mixmixlogp(value, point): priorlogp = st.dirichlet.logpdf(x=point['g_w'], alpha=np.ones(nbr)*0.0000001, ) + \ st.expon.logpdf(x=point['mu_g']).sum() + \ st.dirichlet.logpdf(x=point['l_w'], alpha=np.ones(nbr)*0.0000001, ) + \ st.expon.logpdf(x=point['mu_l']).sum() + \ st.dirichlet.logpdf(x=point['mix_w'], alpha=np.ones(2), ) complogp1 = st.norm.logpdf(x=value, loc=point['mu_g']) mixlogp1 = logsumexp(np.log(point['g_w']) + complogp1, axis=-1, keepdims=True) complogp2 = st.lognorm.logpdf(value, 1., 0., np.exp(point['mu_l'])) mixlogp2 = logsumexp(np.log(point['l_w']) + complogp2, axis=-1, keepdims=True) complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1) mixmixlogpg = logsumexp(np.log(point['mix_w']) + complogp_mix, axis=-1, keepdims=True) return priorlogp, mixmixlogpg value = np.exp(self.norm_x)[:, None] priorlogp, mixmixlogpg = mixmixlogp(value, test_point) # check logp of mixture assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point)) # check model logp assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point)) # check input and check logp again test_point['g_w'] = np.asarray([.1, .1, .2, .6]) test_point['mu_g'] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point)) assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point))
def setUp(self): #test Claims N = 100 # Number of patients M = 6 # Number of hidden states K = 10 # Number of comorbidities D = 721 # Number of claims Dd = 80 # Maximum number of claims that can occur at once min_obs = 10 # Minimum number of observed claims per patient max_obs = 30 # Maximum number of observed claims per patient self.M = M self.N = N self.K = K # Load pre-generated data from pickle import load T = load(open('../../data/X_layer_100_patients_old/T.pkl', 'rb')) self.T = T obs_jumps = load( open('../../data/X_layer_100_patients_old/obs_jumps.pkl', 'rb')) S_start = load(open('../../data/X_layer_100_patients_old/S.pkl', 'rb')) X_start = load(open('../../data/X_layer_100_patients_old/X.pkl', 'rb')) Z_start = load(open('../../data/X_layer_100_patients_old/Z.pkl', 'rb')) L_start = load(open('../../data/X_layer_100_patients_old/L.pkl', 'rb')) O = load(open('../../data/X_layer_100_patients_old/O_input.pkl', 'rb')) self.nObs = nObs = T.sum() self.zeroIndices = np.roll(self.T.cumsum(), 1) self.zeroIndices[0] = 0 obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps]) obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)]) O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)]) S_start = np.concatenate([S_start[i, 0:T[i]] for i in range(N)]) X_start = np.concatenate([X_start[:, 0:T[i], i].T for i in range(N)]) anchors = [] self.Z_original mask = np.ones((K, D)) for anchor in anchors: for hold in anchor[1]: mask[:, hold] = 0 mask[anchor[0], hold] = 1 Z_start = Z_start[mask.nonzero()] with Model() as self.model: self.pi = Dirichlet('pi', a=as_tensor_variable( [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), shape=M) pi_min_potential = Potential( 'pi_min_potential', TT.switch(TT.min(self.pi) < .1, -np.inf, 0)) self.Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M)) self.S = DiscreteObsMJP('S', pi=self.pi, Q=self.Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs)) self.B0 = Beta('B0', alpha=1., beta=1., shape=(K, M)) self.B = Beta('B', alpha=1., beta=1., shape=(K, M)) self.X = Comorbidities('X', S=self.S, B0=self.B0, B=self.B, T=T, shape=(nObs, K)) #self.Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D)) self.Z = Beta_with_anchors('Z', anchors=anchors, K=K, D=D, alpha=0.1, beta=1., shape=(K, D)) self.L = Beta('L', alpha=1., beta=1., shape=D) self.testClaims = Claims('O_obs', X=self.X, Z=self.Z, L=self.L, T=T, D=D, O_input=O, shape=(nObs, Dd), observed=O) self.forS = ForwardS(vars=[self.S], N=N, T=T, nObs=nObs, observed_jumps=obs_jumps) self.forX = ForwardX(vars=[self.X], N=N, T=T, K=K, D=D, Dd=Dd, O=O, nObs=nObs) from scipy.special import logit self.Q_raw_log = logit( np.array([0.631921, 0.229485, 0.450538, 0.206042, 0.609582])) B_lo = logit( np.array( [[0.000001, 0.760000, 0.720000, 0.570000, 0.700000, 0.610000], [0.000001, 0.460000, 0.390000, 0.220000, 0.200000, 0.140000], [0.000001, 0.620000, 0.620000, 0.440000, 0.390000, 0.240000], [0.000001, 0.270000, 0.210000, 0.170000, 0.190000, 0.070000], [0.000001, 0.490000, 0.340000, 0.220000, 0.160000, 0.090000], [0.000001, 0.620000, 0.340000, 0.320000, 0.240000, 0.120000], [0.000001, 0.550000, 0.390000, 0.320000, 0.290000, 0.150000], [0.000001, 0.420000, 0.240000, 0.170000, 0.170000, 0.110000], [0.000001, 0.310000, 0.300000, 0.230000, 0.190000, 0.110000], [0.000001, 0.470000, 0.340000, 0.190000, 0.190000, 0.110000]])) B0_lo = logit( np.array( [[0.410412, 0.410412, 0.418293, 0.418293, 0.429890, 0.429890], [0.240983, 0.240983, 0.240983, 0.240983, 0.240983, 0.240983], [0.339714, 0.339714, 0.339714, 0.339714, 0.339714, 0.339714], [0.130415, 0.130415, 0.130415, 0.130415, 0.130415, 0.130415], [0.143260, 0.143260, 0.143260, 0.143260, 0.143260, 0.143260], [0.211465, 0.211465, 0.211465, 0.211465, 0.211465, 0.211465], [0.194187, 0.194187, 0.194187, 0.194187, 0.194187, 0.194187], [0.185422, 0.185422, 0.185422, 0.185422, 0.185422, 0.185422], [0.171973, 0.171973, 0.171973, 0.171973, 0.171973, 0.171973], [0.152277, 0.152277, 0.152277, 0.152277, 0.152277, 0.152277]])) Z_lo = logit(Z_start) L_lo = logit(L_start) #import pdb; pdb.set_trace() self.myTestPoint = { 'Q_ratematrixoneway': self.Q_raw_log, 'B_logodds': B_lo, 'B0_logodds': B0_lo, 'S': S_start, 'X': X_start, 'Z_anchoredbeta': Z_lo, 'L_logodds': L_lo, 'pi_stickbreaking': np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]) }
def test_normal_mixture_nd(self, nd, ncomp): nd = to_tuple(nd) ncomp = int(ncomp) comp_shape = nd + (ncomp, ) test_mus = np.random.randn(*comp_shape) test_taus = np.random.gamma(1, 1, size=comp_shape) observed = generate_normal_mixture_data(w=np.ones(ncomp) / ncomp, mu=test_mus, sd=1 / np.sqrt(test_taus), size=10) with Model() as model0: mus = Normal("mus", shape=comp_shape) taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape) ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp, )) mixture0 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd, comp_shape=comp_shape) obs0 = NormalMixture("obs", w=ws, mu=mus, tau=taus, shape=nd, comp_shape=comp_shape, observed=observed) with Model() as model1: mus = Normal("mus", shape=comp_shape) taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape) ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp, )) comp_dist = [ Normal.dist(mu=mus[..., i], tau=taus[..., i], shape=nd) for i in range(ncomp) ] mixture1 = Mixture("m", w=ws, comp_dists=comp_dist, shape=nd) obs1 = Mixture("obs", w=ws, comp_dists=comp_dist, shape=nd, observed=observed) with Model() as model2: # Expected to fail if comp_shape is not provided, # nd is multidim and it does not broadcast with ncomp. If by chance # it does broadcast, an error is raised if the mixture is given # observed data. # Furthermore, the Mixture will also raise errors when the observed # data is multidimensional but it does not broadcast well with # comp_dists. mus = Normal("mus", shape=comp_shape) taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape) ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp, )) if len(nd) > 1: if nd[-1] != ncomp: with pytest.raises(ValueError): NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd) mixture2 = None else: mixture2 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd) else: mixture2 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd) observed_fails = False if len(nd) >= 1 and nd != (1, ): try: np.broadcast(np.empty(comp_shape), observed) except Exception: observed_fails = True if observed_fails: with pytest.raises(ValueError): NormalMixture("obs", w=ws, mu=mus, tau=taus, shape=nd, observed=observed) obs2 = None else: obs2 = NormalMixture("obs", w=ws, mu=mus, tau=taus, shape=nd, observed=observed) testpoint = model0.test_point testpoint["mus"] = test_mus testpoint["taus"] = test_taus assert_allclose(model0.logp(testpoint), model1.logp(testpoint)) assert_allclose(mixture0.logp(testpoint), mixture1.logp(testpoint)) assert_allclose(obs0.logp(testpoint), obs1.logp(testpoint)) if mixture2 is not None and obs2 is not None: assert_allclose(model0.logp(testpoint), model2.logp(testpoint)) if mixture2 is not None: assert_allclose(mixture0.logp(testpoint), mixture2.logp(testpoint)) if obs2 is not None: assert_allclose(obs0.logp(testpoint), obs2.logp(testpoint))
def run_normal_mv_model_prior(data, K=3, mus=None, mc_samples=10000, jobs=1, n_cols=10, n_rows=100, neigs=1): n_samples, n_feats = data.shape n_samples = n_cols * n_rows max_neigs = 4 * neigs * (neigs + 1) #print max_neigs to_fill = indxs_neigs(range(n_samples), n_cols=n_cols, n_rows=n_rows, n=neigs) inds = np.where(to_fill != -1)[0] to_fill = to_fill[to_fill != -1] aux = tt.ones(n_samples * max_neigs) * -69 with pm.Model() as model: packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus mus = pm.Normal('mus', mu=[[10, 10], [55, 55], [105, 105], [155, 155], [205, 205]], sd=10, shape=(K, n_feats)) #sds = pm.HalfNormal('sds',sd = 50, shape = (K,n_feats) ) #mus = pm.Normal('mus', mu = [10,55,105,155,205], sd = sds , shape=(K,n_feats) ) #nu = pm.Exponential('nu', 1./10, shape=(K,n_feats), testval=tt.ones((K,n_feats)) ) #mus = pm.StudentT('mus',nu=nu, mu = [[10],[55],[105],[155],[205]], sd = 100., shape=(K,n_feats)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) #TODO one pi per voxel category = pm.Categorical('category', p=pi, shape=n_samples) #pm.Deterministic('pri', tt.as_tensor_variable(get_prior2(category))) #prior = pm.Deterministic('prior',tt.stack( [tt.sum(tt.eq(category[i], category[indxs_neig(i, n_rows=73, n_cols=74)]))/8.0 for i in range(73*74) ] )) #prior = pm.Deterministic('prior',tt.sum(tt.eq(category , category[[j for j in range(8)]].reshape( (8,1) ) ))) aux2 = tt.set_subtensor(aux[inds], category[to_fill]) prior = pm.Deterministic( 'prior', (tt.sum(tt.eq(aux2.reshape( (n_samples, max_neigs)), category.reshape((n_samples, 1))), axis=1) + 0.0) / 8.0) #prior2 = pm.Normal('prior2', mu = prior, sd = 0.5, shape= n_samples) # aux3 = tt.as_tensor_variable(pm.floatX([1,1,2,2,2,2,2,2,2,2]*100 )) # aux3 = tt.set_subtensor( aux3[(tt.eq(category,1)).nonzero()], 2 ) # prior2 = pm.Deterministic('prior2', aux3 ) # xs = DensityDist('x', logp_gmix(mus[category], L, prior, category), observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) #step = pm.CategoricalGibbsMetropolis(vars = [prior] ) trace = sample(mc_samples, step=[step2], n_jobs=jobs, tune=600) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma']) plt.title('normal mv model 40 cols') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
with pm.Model() as model: # Prior for covariance matrix # packed_L = [pm.LKJCholeskyCov('packedL_%d' % i, n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) for i in range(n_comp)] # L = [pm.expand_packed_triangular(dimensions, packed_L[i]) for i in range(n_comp)] # Σ = [pm.Deterministic('Σ_%d' % i, L[i].dot(L[i].T)) for i in range(n_comp)] packed_L = pm.LKJCholeskyCov('packedL', n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) L = pm.expand_packed_triangular(dimensions, packed_L) Σ = pm.Deterministic('Σ', L.dot(L.T)) # Prior for mean: mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(dimensions)), tau=pm.floatX(0.1 * np.eye(2)), shape=(dimensions,)) for i in range(n_comp)] # Prior for weights: pi = Dirichlet('pi', a=pm.floatX(concentration * np.ones(n_comp)), shape=(n_comp,)) prior = sample_prior() x = pm.DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data) # Plot prior for some parameters: # print(prior.keys()) # plt.hist(prior['Σ'][:,0,1]) with model: %time hmc_trace = pm.sample(draws=250, tune=100, cores=4) with model: %time fit_advi = pm.fit(n=50000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi') advi_elbo = pd.DataFrame( {'log-ELBO': -np.log(fit_advi.hist),
def run_lda(args): tf_vectorizer, docs_tr, docs_te = prepare_sparse_matrix_nonlabel(args.n_tr, args.n_te, args.n_word) feature_names = tf_vectorizer.get_feature_names() doc_tr_minibatch = pm.Minibatch(docs_tr.toarray(), args.bsz) doc_tr = shared(docs_tr.toarray()[:args.bsz]) def log_prob(beta, theta): """Returns the log-likelihood function for given documents. K : number of topics in the model V : number of words (size of vocabulary) D : number of documents (in a mini-batch) Parameters ---------- beta : tensor (K x V) Word distributions. theta : tensor (D x K) Topic distributions for documents. """ def ll_docs_f(docs): dixs, vixs = docs.nonzero() vfreqs = docs[dixs, vixs] ll_docs = (vfreqs * pmmath.logsumexp(tt.log(theta[dixs]) + tt.log(beta.T[vixs]), axis=1).ravel()) return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9) return ll_docs_f with pm.Model() as model: beta = Dirichlet("beta", a=pm.floatX((1. / args.n_topic) * np.ones((args.n_topic, args.n_word))), shape=(args.n_topic, args.n_word), ) theta = Dirichlet("theta", a=pm.floatX((10. / args.n_topic) * np.ones((args.bsz, args.n_topic))), shape=(args.bsz, args.n_topic), total_size=args.n_tr, ) doc = pm.DensityDist("doc", log_prob(beta, theta), observed=doc_tr) encoder = ThetaEncoder(n_words=args.n_word, n_hidden=100, n_topics=args.n_topic) local_RVs = OrderedDict([(theta, encoder.encode(doc_tr))]) encoder_params = encoder.get_params() s = shared(args.lr) def reduce_rate(a, h, i): s.set_value(args.lr / ((i / args.bsz) + 1) ** 0.7) with model: approx = pm.MeanField(local_rv=local_RVs) approx.scale_cost_to_minibatch = False inference = pm.KLqp(approx) inference.fit(args.n_iter, callbacks=[reduce_rate, pm.callbacks.CheckParametersConvergence(diff="absolute")], obj_optimizer=pm.adam(learning_rate=s), more_obj_params=encoder_params, total_grad_norm_constraint=200, more_replacements={ doc_tr: doc_tr_minibatch }, ) doc_tr.set_value(docs_tr.toarray()) inp = tt.matrix(dtype="int64") sample_vi_theta = theano.function([inp], approx.sample_node(approx.model.theta, args.n_sample, more_replacements={doc_tr: inp}), ) test = docs_te.toarray() test_n = test.sum(1) beta_pymc3 = pm.sample_approx(approx, draws=args.n_sample)['beta'] theta_pymc3 = sample_vi_theta(test) assert beta_pymc3.shape == (args.n_sample, args.n_topic, args.n_word) assert theta_pymc3.shape == (args.n_sample, args.n_te, args.n_topic) beta_mean = beta_pymc3.mean(0) theta_mean = theta_pymc3.mean(0) pred_rate = theta_mean.dot(beta_mean) pp_test = (test * np.log(pred_rate)).sum(1) / test_n posteriors = { 'theta': theta_pymc3, 'beta': beta_pymc3,} log_top_words(beta_pymc3.mean(0), feature_names, n_top_words=args.n_top_word) save_elbo(approx.hist) save_pp(pp_test) save_draws(posteriors)
df = pd.read_csv("../data/data-lda.txt") n_person = len(df["PersonID"].unique()) n_item = 120 K = 6 IDs = df.values[:, 0].astype(np.int32) - 1 Items = df.values[:, 1].astype(np.int32) - 1 #shape = クラスの数の確率変数に、クラスの値を取るデータ数次元のベクトルを入れる操作がありますが #その詳細な説明は(https://pymc-devs.github.io/pymc3/notebooks/GLM-hierarchical.html)参照 basic_model = Model() with basic_model: #事前分布[50,6] theta = Dirichlet('p_theta', a=(1.0 / K) * np.ones(K), shape=(n_person, K)) #事前分布[6,112] phi = Dirichlet('p_phi', a=(1.0 / n_item) * np.ones(n_item), shape=(K, n_item)) #likelihood #データ数 x 各データのカテゴリー確率ベクトル [1117,6] theta = theta[IDs, :] #データ数 x 各IDに対するアイテム確率ベクトル [1117,112] person_to_item = tt.dot(theta, phi) H = Categorical("tes", p=person_to_item, shape=(1117), observed=Items) ##サンプリング #パラメータの数が多く、ローカルで実行するには重いのでサンプリング数はかなり少なくしてます。
X.append(np.random.normal(U[i], S[i] * np.eye(ndims), Y[i])) X_obs = np.concatenate((X[0], X[1], X[2]), 0) #print X_obs print C print U plt.plot(X_obs[:], np.ones(X_obs.shape), 'o', markersize=8) plt.show() # Infer class labels from pymc3 import Dirichlet, Normal, MvNormal, HalfNormal, Categorical import theano.tensor with Model() as gmm: C = Dirichlet('mixture_coeff', dirichlet_scale * dirichlet_shape, shape=nclusters) S = HalfNormal('S', sd=sd_halfnormal, shape=nclusters) U = Normal('mu', mu=mean_prior_mean, sd=mean_prior_sd, shape=nclusters) Y = Categorical('labels', p=C, shape=nsamples) X = Normal('X', mu=U[Y], sd=S[Y], observed=X_obs) from pymc3 import find_MAP map_estimate = find_MAP(model=gmm) print map_estimate from pymc3 import NUTS, sample, Slice, Metropolis, ElemwiseCategorical, HamiltonianMC modified_map_estimate = copy.deepcopy(map_estimate) modified_map_estimate['mu'] = [ 1 if x < 0.001 else x for x in modified_map_estimate['mu']
# define model mixture log-likelihood def logp_mix(mf): def logp_(value): logps = tt.log(mf) + value return tt.sum(logsumexp(logps, axis=1)) return logp_ # define and fit the probabilistic model with Model() as model: tau = HalfCauchy('tau', beta = 1.) mf = Dirichlet('mf', a = tt.ones(M)/tau, shape=(M,)) xs = DensityDist('logml', logp_mix(mf), observed=LME) with model: approx = fit(method='advi', n = 10000) trace = approx.sample(nsample) traceplot(trace); #compute exceedance probability ep, _ = np.histogram(trace['mf'].argmax(axis = 1), bins = M) ep = pd.DataFrame({'ep':ep/nsample, 'models': cols}) fig = plt.figure(figsize = (10,5)) ax1 = plt.subplot(121)
def test_mixture_of_mixture(self): if theano.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 nbr = 4 with Model() as model: # mixtures components g_comp = Normal.dist(mu=Exponential("mu_g", lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr) l_comp = Lognormal.dist(mu=Exponential("mu_l", lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr) # weight vector for the mixtures g_w = Dirichlet("g_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None, shape=(nbr, )) l_w = Dirichlet("l_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None, shape=(nbr, )) # mixture components g_mix = Mixture.dist(w=g_w, comp_dists=g_comp) l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures mix_w = Dirichlet("mix_w", a=floatX(np.ones(2)), transform=None, shape=(2, )) mix = Mixture("mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)) test_point = model.test_point def mixmixlogp(value, point): floatX = theano.config.floatX priorlogp = (st.dirichlet.logpdf( x=point["g_w"], alpha=np.ones(nbr) * 0.0000001, ).astype(floatX) + st.expon.logpdf(x=point["mu_g"]).sum(dtype=floatX) + st.dirichlet.logpdf( x=point["l_w"], alpha=np.ones(nbr) * 0.0000001, ).astype(floatX) + st.expon.logpdf(x=point["mu_l"]).sum(dtype=floatX) + st.dirichlet.logpdf( x=point["mix_w"], alpha=np.ones(2), ).astype(floatX)) complogp1 = st.norm.logpdf(x=value, loc=point["mu_g"]).astype(floatX) mixlogp1 = logsumexp(np.log(point["g_w"]).astype(floatX) + complogp1, axis=-1, keepdims=True) complogp2 = st.lognorm.logpdf(value, 1.0, 0.0, np.exp(point["mu_l"])).astype(floatX) mixlogp2 = logsumexp(np.log(point["l_w"]).astype(floatX) + complogp2, axis=-1, keepdims=True) complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1) mixmixlogpg = logsumexp(np.log(point["mix_w"]).astype(floatX) + complogp_mix, axis=-1, keepdims=False) return priorlogp, mixmixlogpg value = np.exp(self.norm_x)[:, None] priorlogp, mixmixlogpg = mixmixlogp(value, test_point) # check logp of mixture assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) # check model logp assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) # check input and check logp again test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6]) test_point["mu_g"] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol)
# Log likelihood of Gaussian mixture distribution def logp_gmix(mus, pi, taus, n_components): def logp_(value): logps = [tt.log(pi[i]) + logp_normal(mus[i,:], taus[i], value) for i in range(n_components)] return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0)) return logp_ ## Prior for model: componentMean = ms + np.random.uniform(0,5,n_dimensions) componentTau = np.random.uniform(0,2,n_dimensions) * np.eye(n_dimensions) with pm.Model() as model: mus = MvNormal('mu', mu=pm.floatX(componentMean), tau=pm.floatX(componentTau), shape=(n_components, n_dimensions)) pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(n_components)), shape=(n_components,)) packed_L = [pm.LKJCholeskyCov('packed_L_%d' % i, n=n_dimensions, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(n_components)] L = [pm.expand_packed_triangular(n_dimensions, packed_L[i]) for i in range(n_components)] sigmas = [pm.Deterministic('sigma_%d' % i, tt.dot(L[i],L[i].T)) for i in range(n_components)] taus = [tt.nlinalg.matrix_inverse(sigmas[i]) for i in range(n_components)] xs = DensityDist('x', logp_gmix(mus, pi, taus, n_components), observed=data) with model: advi_fit = pm.fit(n=500000, obj_optimizer=pm.adagrad(learning_rate=1e-1)) advi_trace = advi_fit.sample(10000) advi_summary = pm.summary(advi_trace) pickle_out = open("advi_summary.pickle","wb") pickle.dump(advi_summary, pickle_out) pickle_out.close()
X_start = X_start[0:nObs] O = O[0:nObs] nObs = S_start.shape[0] N = T.shape[0] # Number of patients M = pi_start.shape[0] # Number of hidden states K = Z_start.shape[0] # Number of comorbidities D = Z_start.shape[1] # Number of claims Dd = 16 # Maximum number of claims that can occur at once #import pdb; pdb.set_trace() model = Model() with model: #Fails: #pi = Dirichlet('pi', a = as_tensor_variable([0.147026,0.102571,0.239819,0.188710,0.267137,0.054738]), shape=M, testval = np.ones(M)/float(M)) pi = Dirichlet('pi', a=as_tensor_variable(pi_start.copy()), shape=M) pi_min_potential = Potential('pi_min_potential', TT.switch(TT.min(pi) < .001, -np.inf, 0)) Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M)) #S = DiscreteObsMJP('S', pi=pi, Q=Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs), testval=np.ones(nObs,dtype='int32')) S = DiscreteObsMJP('S', pi=pi, Q=Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs))
plt.figure(figsize=(5, 5)) plt.scatter(data[:, 0], data[:, 1], c='g', alpha=0.5) plt.scatter(ms[0, 0], ms[0, 1], c='r', s=100) plt.scatter(ms[1, 0], ms[1, 1], c='b', s=100) from pymc3.math import logsumexp #Model original with pm.Model() as model: mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(2)), tau=pm.floatX(0.1 * np.eye(2)), shape=(2,)) for i in range(2)] pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(2)), shape=(2,)) xs = DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data) # # #Model for GMM clustering # with pm.Model() as model: # # cluster sizes # p = pm.Dirichlet('p', a=np.array([1., 1.]), shape=2) # # ensure all clusters have some points # p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0)) # # # # cluster centers # means = [MvNormal('mu_%d' % i,mu=pm.floatX(np.zeros(2)),tau=pm.floatX(0.1 * np.eye(2)),shape=(2,)) # for i in range(2)]
def logp_gmix(mus, pi, taus, n_components): def logp_(value): logps = [ tt.log(pi[i]) + logp_normal(mus[i, :], taus[i], value) for i in range(n_components) ] return tt.sum( logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0)) return logp_ # Sparse model with diagonal covariance: with pm.Model() as model: # Weights of each component: w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, )) # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout. mus_signal = MvNormal( 'mus_signal', mu=pm.floatX(signalMean_priorMean), tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2), shape=n_dimensions) mus_background = MvNormal('mus_background', mu=pm.floatX(backgroundMean_priorMean), tau=pm.floatX( np.eye(n_dimensions) / backgroundMean_priorSD**2), shape=n_dimensions) mus = tt.fill_diagonal( tt.reshape(tt.tile(mus_background, n_components),
def setUp(self): #test Claims N = 5 # Number of patients self.N = N M = 3 # Number of hidden states self.M = M K = 2 # Number of comorbidities D = 20 # Number of claims Dd = 4 # Maximum number of claims that can occur at once min_obs = 2 # Minimum number of observed claims per patient max_obs = 4 # Maximum number of observed claims per patient #obs_jumps = np.ones((N,max_obs-1)) obs_jumps = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]]) T = np.array([4, 2, 3, 4, 2]) self.T = T nObs = T.sum() obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps]) obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)]) #O(4,4,5) #O = np.zeros((nObs,Dd),dtype='int8') O = np.zeros((Dd, max_obs, N), dtype='int8') #import pdb; pdb.set_trace() O[[0, 1, 3, 2, 3, 3], [0, 1, 3, 2, 3, 3], [0, 1, 4, 3, 3, 4]] = 1 #O[[0,5,11,12],[0,1,2,3]] = 1 O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)]) Z_lo = np.array([[ -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509 ], [ -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509 ]]) anchors = [] mask = np.ones((K, D)) for anchor in anchors: for hold in anchor[1]: mask[:, hold] = 0 mask[anchor[0], hold] = 1 Z_lo = Z_lo[mask.nonzero()] with Model() as self.model: self.pi = Dirichlet('pi', a=as_tensor_variable([0.5, 0.5, 0.5]), shape=M) pi_min_potential = Potential( 'pi_min_potential', TT.switch(TT.min(self.pi) < .1, -np.inf, 0)) self.Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M)) self.S = DiscreteObsMJP('S', pi=self.pi, Q=self.Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs)) self.B0 = Beta('B0', alpha=1., beta=1., shape=(K, M)) self.B = Beta('B', alpha=1., beta=1., shape=(K, M)) self.X = Comorbidities('X', S=self.S, B0=self.B0, B=self.B, T=T, shape=(nObs, K)) #self.Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D)) self.Z = Beta_with_anchors('Z', anchors=anchors, K=K, D=D, alpha=0.1, beta=1., shape=(K, D)) self.L = Beta('L', alpha=1., beta=1., shape=D) #L = Beta('L', alpha = 0.1, beta = 1, shape=D, transform=None) #L = Uniform('L', left = 0.0, right = 1.0, shape=D, transform=None) #L = Uniform('L', lower = 0.0, upper = 1.0, shape=D) self.testClaims = Claims('O_obs', X=self.X, Z=self.Z, L=self.L, T=T, D=D, O_input=O, shape=(nObs, Dd), observed=O) self.forS = ForwardS(vars=[self.S], N=N, T=T, nObs=nObs, observed_jumps=obs_jumps) self.forX = ForwardX(vars=[self.X], N=N, T=T, K=K, D=D, Dd=Dd, O=O, nObs=nObs) self.myTestPoint = { 'Z_anchoredbeta': Z_lo, 'Q_ratematrixoneway': np.array([0.1, 0.1]), 'pi_stickbreaking': np.array([0.2, 0.1]), 'S': np.array([[0, 0, 1, 1], [1, 1, 1, 1], [1, 1, 2, 2], [0, 2, 2, 2], [0, 0, 0, 1]], dtype=np.int32), 'B0_logodds': np.array([[0., 1., 0.], [0., 0., 1.]]), 'X': np.array([[[0, 1, 1, 1, 1], [0, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1]], [[1, 1, 0, 0, 1], [1, 1, 0, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]], dtype=np.int8), 'L_logodds': np.array([ 0.1, 0.1, 0.1, 0.1, 0.01, 0.01, 0.01, 0.01, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0., 0.0101, 0.0101, 0.0101, 0.01, 0.01, 0.01 ]), 'B_logodds': np.array([[1., 0., 1.], [0., 1., 0.]]) } self.myTestPoint['S'] = np.concatenate( [self.myTestPoint['S'][i, 0:T[i]] for i in range(N)]) self.myTestPoint['X'] = np.concatenate( [self.myTestPoint['X'][:, 0:T[i], i].T for i in range(N)]) stepX_Correct = np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 1]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0]]], dtype=np.int8) stepX_Correct = np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 0]], [[0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 1]]], dtype=np.int8)