def make_H(self,a_active_H=10,b_active_H=0.1,n=100): """ a_active_H: scalar, shape parameter of Gamma distribution. b_active_H: scalar, rate parameter of Gamma distribution. n: scalar, number of examples for each class. """ self.a_active_H=a_active_H self.b_active_H=b_active_H if isinstance(n,tuple) or isinstance(n,list): self.n_list=n self.n=cl.factor_sizes_to_factor_labels(n,start=0) # e.g. (2,3,4,3) to [0,0,1,1,1,2,2,2,2,3,3,3] else: self.n_list=[n]*(self.V) self.n=cl.factor_sizes_to_factor_labels(self.n_list,start=0) # e.g. 3 to [0,0,0,1,1,1,2,2,2,3,3,3] self.N=len(self.n) # number of samples self.C,_=cl.membership_vector_to_indicator_matrix(self.n) # class membership matrix, N times V self.C=numpy.vstack((numpy.ones(shape=(1,self.N),dtype=int),numpy.transpose(self.C))) # V+1 times N #KN=numpy.dot(self.Z,self.C) self.Lambda_H=numpy.zeros(shape=(self.V+1,self.N),dtype=float) ls=self.rng.gamma(shape=self.a_active_H, scale=1/self.b_active_H, size=(self.V+1,self.N)) self.C=numpy.asarray(self.C,dtype=bool) self.Lambda_H[self.C]=ls[self.C] self.Lambda_H_ext=numpy.dot(self.Z,self.Lambda_H) # K times N KN=numpy.dot(self.Z,self.C) self.H=numpy.zeros(shape=(self.K,self.N),dtype=float) for k in range(self.K): for n in range(self.N): if KN[k,n]: self.H[k,n]=self.rng.exponential(scale=1/self.Lambda_H_ext[k,n],size=None) #print self.H self.classes=self.n # print self.classes return self.H,self.n
def make_W(self,a_active_W=10,b_active_W=1000): """ Make the real basis matrix W. a_active_W: scalar, shape parameter of Gamma distribution. b_active_W: scalar, rate parameter of Gamma distribution. """ self.a_active_W=a_active_W self.b_active_W=b_active_W self.ubc=unique_binary_code.unique_binary_code(self.V+1) self.ubc.generate_binary_code() self.ubc.s self.s_str=utility.convert_each_row_of_matrix_to_a_string(self.ubc.s,sep="") self.num_patterns=len(self.s_str) self.M=self.m*self.num_patterns self.W=numpy.zeros(shape=(self.M,self.K),dtype=float) self.Lambda_W=numpy.zeros(shape=(self.M,self.V+1),dtype=float ) ls=self.rng.gamma(shape=self.a_active_W, scale=1/self.b_active_W, size=(self.M,self.V+1)) mp=cl.factor_sizes_to_factor_labels([self.m]*self.num_patterns) # [3,2,4] -> [-1,-1,-1,0,0,1,1,1] MP,_=cl.membership_vector_to_indicator_matrix(mp) #print MP #print self.ubc.s self.S=numpy.dot(MP,self.ubc.s) # extend binary codes, M times V+1 self.S=numpy.asarray(self.S,dtype=bool) self.Lambda_W[self.S]=ls[self.S] #self.features=numpy.empty(shape=(self.M,),dtype=str) self.features=["features"]*self.M # names of features self.feature_patterns=["feature_patterns"]*self.M # pattern of features self.feature_patterns_matrix=numpy.zeros(shape=(self.M,self.V+1),dtype=int) fs=range(0,self.m)*self.num_patterns # [0,1,2,0,1,2,0,1,2,...,0,1,2] #print self.Lambda_W for i in range(self.M): code=numpy.asarray(self.S[i,:],dtype=int) self.Z=numpy.asarray(self.Z,dtype=int) code.shape=(len(code),1) # V+1 times 1 code_ext=self.Z.dot(code) # K times 1 code_ext=numpy.asarray(code_ext,dtype=bool) code_ext.shape=(len(code_ext),) code.shape=(len(code),) self.features[i]="".join(numpy.asarray(code,dtype=str))+"_"+str(fs[i]) self.feature_patterns[i]="".join(numpy.asarray(code,dtype=str)) self.feature_patterns_matrix[i,:]=code code=numpy.asarray(code,dtype=bool) #num_active_views=numpy.sum(code) w=[] for v in range(self.V+1): if self.S[i,v]: w=numpy.concatenate((w,self.rng.exponential(scale=1/self.Lambda_W[i,v],size=self.z_list[v]))) self.W[i,code_ext]=w #print self.W #print self.features return self.W,self.features
def make_H(self, a_active_H=10, b_active_H=0.1, n=100): """ a_active_H: scalar, shape parameter of Gamma distribution. b_active_H: scalar, rate parameter of Gamma distribution. n: scalar, number of examples for each class. """ self.a_active_H = a_active_H self.b_active_H = b_active_H if isinstance(n, tuple) or isinstance(n, list): self.n_list = n self.n = cl.factor_sizes_to_factor_labels( n, start=0) # e.g. (2,3,4,3) to [0,0,1,1,1,2,2,2,2,3,3,3] else: self.n_list = [n] * (self.V) self.n = cl.factor_sizes_to_factor_labels( self.n_list, start=0) # e.g. 3 to [0,0,0,1,1,1,2,2,2,3,3,3] self.N = len(self.n) # number of samples self.C, _ = cl.membership_vector_to_indicator_matrix( self.n) # class membership matrix, N times V self.C = numpy.vstack( (numpy.ones(shape=(1, self.N), dtype=int), numpy.transpose(self.C))) # V+1 times N #KN=numpy.dot(self.Z,self.C) self.Lambda_H = numpy.zeros(shape=(self.V + 1, self.N), dtype=float) ls = self.rng.gamma(shape=self.a_active_H, scale=1 / self.b_active_H, size=(self.V + 1, self.N)) self.C = numpy.asarray(self.C, dtype=bool) self.Lambda_H[self.C] = ls[self.C] self.Lambda_H_ext = numpy.dot(self.Z, self.Lambda_H) # K times N KN = numpy.dot(self.Z, self.C) self.H = numpy.zeros(shape=(self.K, self.N), dtype=float) for k in range(self.K): for n in range(self.N): if KN[k, n]: self.H[k, n] = self.rng.exponential( scale=1 / self.Lambda_H_ext[k, n], size=None) #print self.H self.classes = self.n # print self.classes return self.H, self.n
def __init__(self,z=3,V=3,m=3,rng=numpy.random.RandomState(1000)): """ z: integer,tuple, list, or numpy.ndarray, the number of hidden factors for each view; V: integer, number of views. m: integer, number of features for each pattern. rng: random state. """ self.V=V self.m=m self.rng=rng if isinstance(z,tuple) or isinstance(z,list): self.z_list=z self.z=cl.factor_sizes_to_factor_labels(z) # e.g. (3,3,3,3) to [-1,-1,-1,0,0,0,1,1,2,2,2] else: self.z_list=[z]*(self.V+1) self.z=cl.factor_sizes_to_factor_labels(self.z_list) # e.g. 3 to [-1,-1,-1,0,0,0,1,1,2,2,2] #print self.z self.K=len(self.z) # number of latent factors self.Z,self.z_unique=cl.membership_vector_to_indicator_matrix(self.z) # binary, size K by V+1, self.Z[k,u]=1 indicates the k-th factor in class u.
def __init__(self, z=3, V=3, m=3, rng=numpy.random.RandomState(1000)): """ z: integer,tuple, list, or numpy.ndarray, the number of hidden factors for each view; V: integer, number of views. m: integer, number of features for each pattern. rng: random state. """ self.V = V self.m = m self.rng = rng if isinstance(z, tuple) or isinstance(z, list): self.z_list = z self.z = cl.factor_sizes_to_factor_labels( z) # e.g. (3,3,3,3) to [-1,-1,-1,0,0,0,1,1,2,2,2] else: self.z_list = [z] * (self.V + 1) self.z = cl.factor_sizes_to_factor_labels( self.z_list) # e.g. 3 to [-1,-1,-1,0,0,0,1,1,2,2,2] #print self.z self.K = len(self.z) # number of latent factors self.Z, self.z_unique = cl.membership_vector_to_indicator_matrix( self.z ) # binary, size K by V+1, self.Z[k,u]=1 indicates the k-th factor in class u.
def stability_selection(self,z=3,a_0s=[1e2,1e1,1,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,1e-1,1e-2,1e-3],b_0s=[1e2,1e1,1,1e-1,1e-2,1e-3,1e-4,1e-5,1e-6,1e-7,1e-8,1e-9,1e-10],a_larges=[1e2,1e1,1,1e-1,1e-2],b_larges=[1e2,1e1,1,1e-1,1e-2],a_small=1e2,b_small=1e-30,ab_tied=True,mean_W=None,mean_H_large=1,mean_H_small=1e-32,num_samplings=1000,max_iter=200, threshold_F=0.1,rank_method="Wilcoxon_rank_sum_test", maxbc=12, key_feature_mean_feature_value_threshold=1,key_feature_neglog10pval_threshold=10,max_num_feature_each_pattern=3, compute_variational_lower_bound=False,variational_lower_bound_min_rate=1e-4,if_plot_lower_bound=False,if_plot_heatmap=False, a_H_test=0.1,b_H_test=1e-10,dir_save="./",prefix="MCNMF_stability_selection",verb=False,rng=numpy.random.RandomState(1000)): """ SS-MV-NMF to obtain the empirical probability matrix. INPUTS: z: integer, tuple of length V+1, list/numpy vector of size (K,), the labels of the factors (columns) in W. If z is a scalar, it means each view (including the ubi view) has z factors. If z is a tuple, e.g. (3,3,3,3), z[u] means the the u-th view has z[u] factors. If z is a list or numpy vector,e.g. [-1-1-1,0,0,1,1,1,2,2,2] where -1 means ubi view, z[k] means the k-th factor has label z[k]. a_0s,b_0s, list of numpy vector, the predefined sets of the shape and rate parameters of Gamma for generating lambda of the exponential distribution of W. a_larges,b_larges: list or numpy vector, the predefined sets of the shape and rate parameters of Gamma for generating lambda of the exponential distribution of non-zero H blocks. a_small,b_small: float scalar, shape and rate parameters of Gamma for generating lambda of the exponential distribution of zero H blocks. ab_tied: bool, whether tie rate a to b. If True, b given in the input of this function is disregarded, and set b_0=mean_W*a_0, b_large=self.mean_H_large*a_large, b_small=self.mean_H_small*a_small. mean_W: float scalar, the estimated mean value of W; If None, set it to mean(X). mean_H, float scalar, the estimated mean value of non-zero blocks in H; If None, set it to 1. mean_H_small: float scalar, the estimated eman value of zero blocks in H; If None, set it to 1e-32. num_samplings: integer, the number of samplings, i.e. the number of independent runs of MV-NMF. max_iter: integer, the maximal number of iterations allowed in MV-NMF. threshold_F: positive float scalar, used to generate the feature activity indicator matrix: F= F_mean >= (threshold_F * mean(F_mean)). rank_method: string, the method to rank the features within a pattern, can be one of {"mean_basis_value","mean_feature_value","Wilcoxon_rank_sum_test"}. maxbc: positive integer, the maximal number of views allowed to generate all possible binary codes as feature patterns. key_feature_mean_feature_value_threshold: positive integer, the lowest limit of the mean feature values when selecting key features. key_feature_neglog10pval_threshold: positive integer, the lower limit of the negative log10(pval) when selecting key features. max_num_feature_each_pattern: positive integer, the maximal number of key features allowed to be selected in each pattern. compute_variational_lower_bound: bool, whether compute variational lower bounds. variational_lower_bound_min_rate: float, a tiny positive number, the threshold of the local mean change rate, below which the algorithm will terminate. if_plot_lower_bound: bool, whether plot the variational lower bound plot. dir_save: string, path, e.g. "/home/yifeng/research/mf/mvmf_v1_1/results/", path to save the lower bound plot. perfix: string, prefix of the saved file name. verb: bool, whether plot the information for each iteration, including lower bound. rng: random number generator. OUTPUTS: E_W: numpy matrix, the expected basis matrix W. E_H: numpy matrix, the expected coefficient matrix H. training_time: total time spent including time of computing lower bound. training_time_L: time spent only for computing lower bound. """ if isinstance(z,(list,tuple,numpy.ndarray)): self.z_str="".join(numpy.asarray(z,dtype=str)) else: self.z_str=str(z) self.z=z self.a_0s=a_0s self.b_0s=b_0s self.a_larges=a_larges self.b_larges=b_larges self.a_small=a_small self.b_small=b_small self.max_iter=max_iter self.threshold_F=threshold_F self.compute_variational_lower_bound=compute_variational_lower_bound self.variational_lower_bound_min_rate=variational_lower_bound_min_rate self.if_plot_lower_bound=if_plot_lower_bound self.verb=verb self.training_times=[] # factorization time self.fs_times=[] # feature selection time self.training_times_L=[] # time computing lower bounds self.settings=[] # parameter settings self.E_W=0 self.L_W=0 self.F_mean=0 all_combinations=[] if ab_tied: if mean_W is None: self.mean_X=numpy.mean(self.X) self.mean_W=self.mean_X else: self.mean_W=mean_W if mean_H_large is None: self.mean_H_large=1 else: self.mean_H_large=mean_H_large if mean_H_small is None: self.mean_H_small=1e-32 else: self.mean_H_small=mean_H_small b_smal=self.mean_H_small*a_small for a_0 in a_0s: for a_large in a_larges: all_combinations.append((a_0,self.mean_W*a_0,a_large,self.mean_H_large*a_large)) else: for a_0 in a_0s: for b_0 in b_0s: for a_large in a_larges: for b_large in b_larges: all_combinations.append((a_0,b_0,a_large,b_large)) for ns in range(num_samplings): print "The {0}-th run of MV-NMF...".format(ns) rng_ns=numpy.random.RandomState(ns) # sample data ind_cv=cl.kfold_cross_validation(self.y,k=2,shuffle=True,rng=rng_ns) ind_subsample=ind_cv==1 X=self.X[:,ind_subsample] y=self.y[ind_subsample] # reorder the sampled data X,y,_=cl.sort_classes(numpy.transpose(X),y) X=numpy.transpose(X) # sample setting ind_setting=rng_ns.choice(len(all_combinations), size=1, replace=False) a_0,b_0,a_large,b_large=all_combinations[ind_setting] #a_small=a_small # when a_small>1, larger a_small, more symmetric lambda A=(a_large,a_small) #b_large=0.01 # when a>1 fixed, control rate of lambda, smaller b_large, larger lambda, wider lambda, narrower w #b_small=b_small # when a_small>1 fixed, control rate of lambda, smaller b_small, larger lambda, wider lambda, smaller h, narrower h B=(b_large,b_small) # current setting setting="a_0="+str(a_0)+"_b_0="+str(b_0)+"_a_large="+str(a_large)+"_b_large="+str(b_large) self.settings.append(setting) print "The current setting: " + setting # run the model self.model_fs=mcnmf.mcnmf(X,y,self.features) _,_,training_time,training_time_L=self.model_fs.factorize(z=z,a_0=a_0,b_0=b_0,A=A,B=B,max_iter=max_iter,compute_variational_lower_bound=compute_variational_lower_bound,variational_lower_bound_min_rate=variational_lower_bound_min_rate,if_plot_lower_bound=if_plot_lower_bound,dir_save=dir_save,prefix=prefix+"_"+setting,verb=verb,rng=rng) #trim_nonzero_portion=0.01 #model_fs.trim(trim_nonzero_portion=trim_nonzero_portion,alpha=0.01,threshold_E_W=None,threshold_E_H=None) # feature selection, this procedure only need to have F, so no need to run the scoring procedures _,fs_time=self.model_fs.sel_feat(called_by_ssmcnmf_loop=True,threshold_F=threshold_F,rank_method=rank_method,maxbc=maxbc,key_feature_mean_feature_value_threshold=key_feature_mean_feature_value_threshold,key_feature_neglog10pval_threshold=key_feature_neglog10pval_threshold,max_num_feature_each_pattern=max_num_feature_each_pattern,header=numpy.unique(self.y),rng=rng) # record time self.training_times.append(round(training_time,2)) self.fs_times.append(round(fs_time,2)) self.training_times_L.append(round(training_time_L,2)) # update E_W and F self.E_W=self.E_W + self.model_fs.E_W self.L_W=self.L_W + self.model_fs.L_W self.F_mean=self.F_mean + self.model_fs.F if if_plot_heatmap: self.model_fs.plot_heatmap(dir_save, prefix+"_"+setting, pattern="All",rank_method="mean_basis_value", unique_class_names=numpy.unique(self.y), width=10, height=10, fontsize=6, fmt="png",colormap="hot") # get average result self.E_W=self.E_W/num_samplings self.L_W=self.L_W/num_samplings # get empirical probability self.F_mean=self.F_mean/num_samplings # update the corresponding variables in model_fs self.model_fs.E_W=self.E_W self.model_fs.L_W=self.L_W self.model_fs.X=self.X self.model_fs.y=self.y self.model_fs.Y,self.model_fs.y_unique=cl.membership_vector_to_indicator_matrix(self.y) self.model_fs.N=len(self.y) # final update E_H self.model_fs.learn_H_given_X_test_and_E_W(self.X,a_H_test=a_H_test,b_H_test=b_H_test,feature_selection=False,max_iter=max_iter,compute_variational_lower_bound=compute_variational_lower_bound,variational_lower_bound_min_rate=variational_lower_bound_min_rate,if_plot_lower_bound=if_plot_lower_bound,dir_save=dir_save,prefix=prefix+"_final_update_E_H",verb=verb,rng=rng) self.model_fs.E_H=self.model_fs.E_H_test self.model_fs.E_H_test=None self.E_H=self.model_fs.E_H # update time record self.model_fs.training_time=numpy.sum(self.training_times)+self.model_fs.test_time self.model_fs.training_time_L=numpy.sum(self.training_times_L)+self.model_fs.test_time_L self.model_fs.test_time=0 self.model_fs.test_time_L=0 #self.trim(trim_nonzero_portion=0.01,alpha=0.05,threshold_E_W=None,threshold_E_H=None) # trim if necessary # store settings self.settings=numpy.array(self.settings) print "Finished stability selection :)" return self.E_W,self.E_H,self.model_fs.training_time,self.model_fs.training_time_L
dtype=int, delimiter=",", skiprows=1) test_set_y = test_set_x[:, 0] test_set_x = test_set_x[:, 1:] test_set_x = test_set_x.transpose() # limit the number of training set #train_set_x=train_set_x[:,0:10000] #train_set_y=train_set_y[0:10000] num_train = train_set_x.shape[1] num_test = test_set_x.shape[1] # convert train_set_y to binary codes train_set_y01, z_unique = cl.membership_vector_to_indicator_matrix( z=train_set_y, z_unique=range(10)) train_set_y01 = train_set_y01.transpose() test_set_y01, _ = cl.membership_vector_to_indicator_matrix(z=test_set_y, z_unique=range(10)) test_set_y01 = test_set_y01.transpose() num_feat = train_set_x.shape[0] visible_type = "Bernoulli" hidden_type = "Gaussian" hidden_type_fixed_param = 1 rng = numpy.random.RandomState(100) M = num_feat normalization_method = "None" if visible_type == "Bernoulli": normalization_method = "scale"
def make_W(self, a_active_W=10, b_active_W=1000): """ Make the real basis matrix W. a_active_W: scalar, shape parameter of Gamma distribution. b_active_W: scalar, rate parameter of Gamma distribution. """ self.a_active_W = a_active_W self.b_active_W = b_active_W self.ubc = unique_binary_code.unique_binary_code(self.V + 1) self.ubc.generate_binary_code() self.ubc.s self.s_str = utility.convert_each_row_of_matrix_to_a_string(self.ubc.s, sep="") self.num_patterns = len(self.s_str) self.M = self.m * self.num_patterns self.W = numpy.zeros(shape=(self.M, self.K), dtype=float) self.Lambda_W = numpy.zeros(shape=(self.M, self.V + 1), dtype=float) ls = self.rng.gamma(shape=self.a_active_W, scale=1 / self.b_active_W, size=(self.M, self.V + 1)) mp = cl.factor_sizes_to_factor_labels( [self.m] * self.num_patterns) # [3,2,4] -> [-1,-1,-1,0,0,1,1,1] MP, _ = cl.membership_vector_to_indicator_matrix(mp) #print MP #print self.ubc.s self.S = numpy.dot(MP, self.ubc.s) # extend binary codes, M times V+1 self.S = numpy.asarray(self.S, dtype=bool) self.Lambda_W[self.S] = ls[self.S] #self.features=numpy.empty(shape=(self.M,),dtype=str) self.features = ["features"] * self.M # names of features self.feature_patterns = ["feature_patterns" ] * self.M # pattern of features self.feature_patterns_matrix = numpy.zeros(shape=(self.M, self.V + 1), dtype=int) fs = range(0, self.m) * self.num_patterns # [0,1,2,0,1,2,0,1,2,...,0,1,2] #print self.Lambda_W for i in range(self.M): code = numpy.asarray(self.S[i, :], dtype=int) self.Z = numpy.asarray(self.Z, dtype=int) code.shape = (len(code), 1) # V+1 times 1 code_ext = self.Z.dot(code) # K times 1 code_ext = numpy.asarray(code_ext, dtype=bool) code_ext.shape = (len(code_ext), ) code.shape = (len(code), ) self.features[i] = "".join(numpy.asarray( code, dtype=str)) + "_" + str(fs[i]) self.feature_patterns[i] = "".join(numpy.asarray(code, dtype=str)) self.feature_patterns_matrix[i, :] = code code = numpy.asarray(code, dtype=bool) #num_active_views=numpy.sum(code) w = [] for v in range(self.V + 1): if self.S[i, v]: w = numpy.concatenate( (w, self.rng.exponential(scale=1 / self.Lambda_W[i, v], size=self.z_list[v]))) self.W[i, code_ext] = w #print self.W #print self.features return self.W, self.features