def test_perfect_separation_of_latents(): latents = data() W, H, mapping = generate_w_h(latents, n_users=100) nmf = NMF(solver='mu', init='custom', n_components=3) nmf.components_ = H nmf.n_components_ = H.shape[0] X = nmf.inverse_transform(W)
def sci_nmf(self, components=2, procedure=None, separate=False, max_iter=1000, w_init=False, h_init=None): # Performs non-negative matrix factorization # procedure takes one of the initial methods of approximation listed in the parameters for nmf here: # https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html # Sklearn only allows initalization of H in the equation X = W * H # to initialize W, set w_init to true and pass W^T into h_init to solve the equation X^T = H^T * W^T arr = np.array(self.array) W = None H = None if h_init is None: model = NMF(n_components=components, init=procedure, random_state=0, max_iter=1000, tol=1e-10) W = model.fit_transform(arr) H = model.components_ else: model = NMF(n_components=components, init=None, max_iter=1000, tol=1e-10) #, regularization = 'components') model.n_components_ = components #W = model.fit_transform(X = arr, W = w_init, H = h_init) #H = model.components_ model.components_ = h_init if w_init: H_transpose = model.transform(X=np.transpose(arr)) W = np.transpose(model.components_) H = np.transpose(H_transpose) else: W = model.transform(X=arr) H = model.components_ # Return either multiplied together as a new object OR as decomposed matrices if separate == False: tor = mat_opr(pd.DataFrame(np.dot(W, H))) tor.dataframe.columns = self.dataframe.columns tor.dataframe.index = self.dataframe.index return tor else: return W, H
def generate_array_data_file(): latents = data() # W = np.zeros(shape=(n_users + 1, n_components)) # H = np.zeros(shape=(n_components, n_items)) n_users = 100 W, H, mapping = generate_w_h(latents, n_users=n_users, use_random=True, sigma=.5, mean=-1.5) nmf = NMF(solver='mu', init='custom', n_components=3) nmf.components_ = H nmf.n_components_ = H.shape[0] X = nmf.inverse_transform(W) # returns a shape of n_users+1. Wipe these ratings since they are not yet determined. X[n_users, :] = np.nan file_contents = dedent(''' namespace MonsterMatch.CollaborativeFiltering { public static class MonsterMatchArrayData { // @formatter:off public const int UserCount = %d; public const int ItemCount = %d; public const int PlayerUserId = %d; public const int FactorCount = %d; public static readonly int[] ForProfiles = %s; public static readonly double[,] Data = %s; public static readonly double[,] Pu = %s; public static readonly double[,] Qi = %s; // @formatter:on } } ''') file_contents = file_contents % ( n_users + 1, len(latents), n_users, nmf.n_components_, 'new [] {' + ','.join([str(profile.index) for profile in latents]) + '}', csharp_repr_ndarray(X), csharp_repr_ndarray(W), csharp_repr_ndarray(H.T)) print(file_contents)
def main(argvs): process_id = int(argvs[1]) state_n = int(argvs[2]) chromosome = argvs[3] start_index = int(argvs[4]) end_index = int(argvs[5]) resolution_size = int(argvs[6]) annotation_result_dir = argvs[7] print(process_id, state_n, chromosome, start_index, end_index, resolution_size, annotation_result_dir) E = len(configs.chromHMM_assay) assay_list = configs.chromHMM_assay K = state_n window_size = 100 # load nmf model nmf = NMF(n_components=K, init='random', random_state=0) with open(os.path.join(script_dir, "../train/param_{}.json".format(K))) as param_f: param = json.load(param_f) nmf.components_ = np.array(param['components_']) nmf.n_components_ = param['n_components_'] if not os.path.exists(annotation_result_dir): try: os.makedirs(annotation_result_dir) except (FileExistsError): pass annotation_result_file = os.path.join( annotation_result_dir, chromosome + "_" + str(process_id) + ".bed") with open(annotation_result_file, 'w') as annotation_f: # start annotation resol_fs = [] for assay in assay_list: resol_fs.append( open( os.path.join( configs.blacklist_rm_data_path, assay + "/resolution-" + str(resolution_size) + "bp/", chromosome + ".bed"), "r")) # read in genome data done = False while not done: data_array = [[] for i in range(E)] start_annotation_index = None for i in range(E): resol_f = resol_fs[i] g = 0 while g < window_size: line = resol_f.readline() if not line: done = True # raise(Exception("Reach end of file, wroing End index.")) break inf = line.strip('\n').split() index = int(inf[0]) signal = float(inf[1]) if index < start_index: continue if start_annotation_index is None: start_annotation_index = index signal = np.arcsinh(signal) data_array[i].append(signal) print(index) print("append" + assay_list[i] + ":" + str(index)) g += 1 if index >= end_index: done = True break # currently skip the data when it is too small # a better implementation is to fetch the data from former position # test if data array lines up is_lined_up = True for myarray in data_array: if len(myarray) != len(data_array[0]): is_lined_up = False for array in data_array: print(len(array)) break if g >= window_size / 2 and is_lined_up: x_m = np.asarray(data_array, dtype=np.int8) result = nmf.transform(x_m.T) # shape (G,E) index = start_annotation_index print(result.shape) for i in range(g): print("{} {} ".format(index * resolution_size, (index + 1) * resolution_size), end="", file=annotation_f) for k in range(K): print("{} ".format(result[i, k]), end="", file=annotation_f) print(file=annotation_f) index += 1 return 0