def __init__(self, X, cctypes, distargs, n_grid=30, Zv=None, Zrcv=None, hypers=None, seed=None): """ cc_state constructor input arguments: -- X: a list of numpy data columns. -- cctypes: a list of strings where each entry is the data type for each column. -- distargs: a list of distargs appropriate for each type in cctype. For details on distrags see the documentation for each data type. optional arguments: -- n_grid: number of bins for hyperparameter grids. Default = 30. -- Zv: The assignment of columns to views. If not specified, a partition is generated randomly -- Zrcv: The assignment of rows to clusters for each view -- ct_kernel: which column transition kenerl to use. Default = 0 (Gibbs) -- seed: seed the random number generator. Default = system time. example: >>> import numpy >>> n_rows = 100 >>> X = [numpy.random.normal(n_rows), numpy.random.normal(n_rows)] >>> State = cc_state(X, ['normal', 'normal'], [None, None]) """ if seed is not None: random.seed(seed) numpy.random.seed(seed) self.n_rows = len(X[0]) self.n_cols = len(X) self.n_grid = n_grid # construct the dims self.dims = [] for col in range(self.n_cols): Y = X[col] cctype = cctypes[col] if _is_uncollapsed[cctype]: dim = cc_dim_uc(Y, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs[col]) else: dim = cc_dim(Y, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs[col]) self.dims.append(dim) # set the hyperparameters in the dims if hypers is not None: for d in range(self.n_cols): self.dims[d].set_hypers(hypers[d]) # initialize CRP alpha self.alpha_grid = utils.log_linspace(1.0/self.n_cols, self.n_cols, self.n_grid) self.alpha = random.choice(self.alpha_grid) assert len(self.dims) == self.n_cols if Zrcv is not None: assert Zv is not None assert len(Zv) == self.n_cols assert len(Zrcv) == max(Zv)+1 assert len(Zrcv[0]) == self.n_rows # construct the view partition if Zv is None: Zv, Nv, V = utils.crp_gen(self.n_cols, self.alpha) else: Nv = utils.bincount(Zv) V = len(Nv) # construct views self.views = [] for view in range(V): indices = [i for i in range(self.n_cols) if Zv[i] == view] dims_view = [] for index in indices: dims_view.append(self.dims[index]) if Zrcv is None: self.views.append(cc_view(dims_view, n_grid=n_grid)) else: self.views.append(cc_view(dims_view, Z=numpy.array(Zrcv[view]), n_grid=n_grid)) self.Zv = numpy.array(Zv) self.Nv = Nv self.V = V
def __transition_columns_kernel_uncollapsed(self, col, m=3, append=False): """Gibbs with auxiliary parameters for uncollapsed data types""" if append: col = self.n_cols-1 # get start view, v_a, and check whether a singleton v_a = self.Zv[col] if append: is_singleton = False pv = list(self.Nv) else: is_singleton = (self.Nv[v_a] == 1) pv = list(self.Nv) # Get crp probabilities under each view. remove from current view. # If v_a is a singleton, do not consider move to new singleton view. if is_singleton: pv[v_a] = self.alpha else: pv[v_a] -= 1 # take the log pv = numpy.log(numpy.array(pv)) ps = [] # calculate probability under each view's assignment dim = self.dims[col] dim_holder = [] for v in range(self.V): if v == v_a: dim_holder.append(dim) else: dim_holder.append(copy.deepcopy(dim)) dim_holder[-1].reassign(self.views[v].Z) p_v = dim_holder[-1].full_marginal_logp()+pv[v] ps.append(p_v) # if not a singleton, propose m auxiliary parameters (views) if not is_singleton: # crp probability of singleton, split m times. log_aux = log(self.alpha/float(m)) proposal_views = [] for _ in range(m): # propose (from prior) and calculate probability under each view dim_holder.append(copy.deepcopy(dim)) proposal_view = cc_view([dim_holder[-1]], n_grid=self.n_grid) proposal_views.append(proposal_view) dim_holder[-1].reassign(proposal_view.Z) p_v = dim_holder[-1].full_marginal_logp()+log_aux ps.append(p_v) # draw a view v_b = utils.log_pflip(ps) newdim = dim_holder[v_b] self.dims[dim.index] = newdim if append: if v_b >= self.V: index = v_b-self.V assert( index >= 0 and index < m) proposal_view = proposal_views[index] self.__append_new_dim_to_view(newdim, v_b, proposal_view, is_uncollapsed=True) return # clean up if v_b != v_a: if is_singleton: assert( v_b < self.V ) self.__destroy_singleton_view(newdim, v_a, v_b, is_uncollapsed=True) elif v_b >= self.V: index = v_b-self.V assert( index >= 0 and index < m) proposal_view = proposal_views[index] self.__create_singleton_view(newdim, v_a, proposal_view, is_uncollapsed=True) else: self.__move_dim_to_view(newdim, v_a, v_b, is_uncollapsed=True)