def run_nmf(V): """ Run standard nonnegative matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ # Euclidean rank = 10 model = mf.mf(V, seed = "random_vcol", rank = rank, method = "nmf", max_iter = 12, initialize_only = True, update = 'euclidean', objective = 'fro') fit = mf.mf_run(model) print_info(fit) # divergence model = mf.mf(V, seed = "random_vcol", rank = rank, method = "nmf", max_iter = 12, initialize_only = True, update = 'divergence', objective = 'div') fit = mf.mf_run(model) print_info(fit)
def run_snmnmf(V, V1): """ Run sparse network-regularized multiple NMF. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 model = mf.mf(target = (V, V1), seed = "random_c", rank = rank, method = "snmnmf", max_iter = 12, initialize_only = True, A = abs(sp.rand(V1.shape[1], V1.shape[1], density = 0.7, format = 'csr')), B = abs(sp.rand(V.shape[1], V1.shape[1], density = 0.7, format = 'csr')), gamma = 0.01, gamma_1 = 0.01, lamb = 0.01, lamb_1 = 0.01) fit = mf.mf_run(model) # print all quality measures concerning first target and mixture matrix in multiple NMF print_info(fit, idx = 0) # print all quality measures concerning second target and mixture matrix in multiple NMF print_info(fit, idx = 1)
def factorize(V): """ Perform NMF - Divergence factorization on the sparse Medlars data matrix. Return basis and mixture matrices of the fitted factorization model. :param V: The Medlars data matrix. :type V: `scipy.sparse.csr_matrix` """ print "Performing NMF - Divergence factorization ..." model = mf.mf(V, seed = "random_vcol", rank = 12, method = "nmf", max_iter = 15, initialize_only = True, update = 'divergence', objective = 'div') fit = mf.mf_run(model) print "... Finished" sparse_w, sparse_h = fit.fit.sparseness() print """Stats: - iterations: %d - KL Divergence: %5.3f - Euclidean distance: %5.3f - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric = 'euclidean'), sparse_w, sparse_h) return fit.basis(), fit.coef()
def factorize(V): """ Perform LSNMF factorization on the ORL faces data matrix. Return basis and mixture matrices of the fitted factorization model. :param V: The ORL faces data matrix. :type V: `numpy.matrix` """ print "Performing LSNMF factorization ..." model = mf.mf(V, seed = "random_vcol", rank = 25, method = "lsnmf", max_iter = 50, initialize_only = True, sub_iter = 10, inner_sub_iter = 10, beta = 0.1, min_residuals = 1e-8) fit = mf.mf_run(model) print " ... Finished" print """Stats: - iterations: %d - final projected gradients norm: %5.3f - Euclidean distance: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric = 'euclidean')) return fit.basis(), fit.coef()
def run_bd(V): """ Run Bayesian decomposition. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 model = mf.mf(V, seed = "random_c", rank = rank, method = "bd", max_iter = 12, initialize_only = True, alpha = np.mat(np.zeros((V.shape[0], rank))), beta = np.mat(np.zeros((rank, V.shape[1]))), theta = .0, k = .0, sigma = 1., skip = 100, stride = 1, n_w = np.mat(np.zeros((rank, 1))), n_h = np.mat(np.zeros((rank, 1))), n_sigma = False) fit = mf.mf_run(model) print_info(fit)
def main(): if (len(sys.argv) != 3): print("usage: %s <credentials.json> <sheetname>" % (sys.argv[0])) return 1 creds = sys.argv[1] sheetname = sys.argv[2] # authorize returns gspread client instance gc = utils.authorize(creds) # client.open returns the gspread spreadsheet instance spsheet = gc.open(sheetname) # create a object of 'mf' class, # takes gspread spreadsheet instance mfsheet = mf(spsheet) # dump all data from spreadsheet into a file mf_json_file = sheetname + ".json" mfsheet.dump(mf_json_file) # output the summary sheet data mfsheet.get_summary() # dump summary data into a file mf_summary_json_file = sheetname + "-summary.json" mfsheet.dump_summary(mf_summary_json_file)
def run_snmf(V): """ Run sparse nonnegative matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ # SNMF/R rank = 10 model = mf.mf(V, seed = "random_c", rank = rank, method = "snmf", max_iter = 12, initialize_only = True, version = 'r', eta = 1., beta = 1e-4, i_conv = 10, w_min_change = 0) fit = mf.mf_run(model) print_info(fit) # SNMF/L model = mf.mf(V, seed = "random_vcol", rank = rank, method = "snmf", max_iter = 12, initialize_only = True, version = 'l', eta = 1., beta = 1e-4, i_conv = 10, w_min_change = 0) fit = mf.mf_run(model) print_info(fit)
def run_nsnmf(V): """ Run nonsmooth nonnegative matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 model = mf.mf(V, seed = "random", rank = rank, method = "nsnmf", max_iter = 12, initialize_only = True, theta = 0.5) fit = mf.mf_run(model) print_info(fit)
def run_pmf(V): """ Run probabilistic matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 model = mf.mf(V, seed = "random_vcol", rank = rank, method = "pmf", max_iter = 12, initialize_only = True, rel_error = 1e-5) fit = mf.mf_run(model) print_info(fit)
def run_psmf(V): """ Run probabilistic sparse matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 prng = np.random.RandomState() model = mf.mf(V, seed = None, rank = rank, method = "psmf", max_iter = 12, initialize_only = True, prior = prng.uniform(low = 0., high = 1., size = 10)) fit = mf.mf_run(model) print_info(fit)
def run_bmf(V): """ Run binary matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 model = mf.mf(V, seed = "random_vcol", rank = rank, method = "bmf", max_iter = 12, initialize_only = True, lambda_w = 1.1, lambda_h = 1.1) fit = mf.mf_run(model) print_info(fit)
def run_lfnmf(V): """ Run local fisher nonnegative matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 pnrg = np.random.RandomState() model = mf.mf(V, seed = None, W = abs(pnrg.randn(V.shape[0], rank)), H = abs(pnrg.randn(rank, V.shape[1])), rank = rank, method = "lfnmf", max_iter = 12, initialize_only = True, alpha = 0.01) fit = mf.mf_run(model) print_info(fit)
def run_lsnmf(V): """ Run least squares nonnegative matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 model = mf.mf(V, seed = "random_vcol", rank = rank, method = "lsnmf", max_iter = 12, initialize_only = True, sub_iter = 10, inner_sub_iter = 10, beta = 0.1, min_residuals = 1e-5) fit = mf.mf_run(model) print_info(fit)
def run_one(V, rank): """ Run standard NMF on leukemia data set. 50 runs of Standard NMF are performed and obtained consensus matrix averages all 50 connectivity matrices. :param V: Target matrix with gene expression data. :type V: `numpy.matrix` (of course it could be any format of scipy.sparse, but we will use numpy here) :param rank: Factorization rank. :type rank: `int` """ print "================= Rank = %d =================" % rank consensus = np.mat(np.zeros((V.shape[1], V.shape[1]))) for i in xrange(50): # Standard NMF with Euclidean update equations is used. For initialization random Vcol method is used. # Objective function is the number of consecutive iterations in which the connectivity matrix has not changed. # We demand that factorization does not terminate before 30 consecutive iterations in which connectivity matrix # does not change. For a backup we also specify the maximum number of iterations. Note that the satisfiability # of one stopping criteria terminates the run (there is no chance for divergence). model = mf.mf(V, method = "nmf", rank = rank, seed = "random_vcol", max_iter = 200, update = 'euclidean', objective = 'conn', conn_change = 40, initialize_only = True) fit = mf.mf_run(model) print "%2d / 50 :: %s - init: %s ran with ... %3d / 200 iters ..." % (i + 1, fit.fit, fit.fit.seed, fit.fit.n_iter) # Compute connectivity matrix of factorization. # Again, we could use multiple runs support of the MF library, track factorization model across 50 runs and then # just call fit.consensus() consensus += fit.fit.connectivity() # averaging connectivity matrices consensus /= 50. # reorder consensus matrix p_consensus = reorder(consensus) # plot reordered consensus matrix plot(p_consensus, rank)
def run_icm(V): """ Run iterated conditional modes. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 pnrg = np.random.RandomState() model = mf.mf(V, seed = "nndsvd", rank = rank, method = "icm", max_iter = 12, initialize_only = True, iiter = 20, alpha = pnrg.randn(V.shape[0], rank), beta = pnrg.randn(rank, V.shape[1]), theta = 0., k = 0., sigma = 1.) fit = mf.mf_run(model) print_info(fit)
def main(): instance = mf() browser = instance.login() instance.loadPortfolio(browser)
def main(): instance = mf() browser = instance.login() filePath = instance.loadPortfolio(browser) return filePath
from scipy.sparse import csr_matrix from scipy import array from numpy import dot V = csr_matrix((array([1,2,3,4,5,6]), array([0,2,2,0,1,2]), array([0,2,3,6])), shape=(3,3)) # Print this tiny matrix in dense format print V.todense() # Run Standard NMF rank 4 algorithm # Update equations and cost function are Standard NMF specific parameters (among others). # If not specified the Euclidean update and Forbenius cost function would be used. # We don't specify initialization method. Algorithm specific or random intialization will be used. # In Standard NMF case, by default random is used. # Returned object is fitted factorization model. Through it user can access quality and performance measures. # The fit's attribute `fit` contains all the attributes of the factorization. fit = mf.mf(V, method = "nmf", max_iter = 30, rank = 4, update = 'divergence', objective = 'div') # Basis matrix. It is sparse, as input V was sparse as well. W = fit.basis() print "Basis matrix" print W.todense() # Mixture matrix. We print this tiny matrix in dense format. H = fit.coef() print "Coef" print H.todense() # Return the loss function according to Kullback-Leibler divergence. By default Euclidean metric is used. print "Distance Kullback-Leibler", fit.distance(metric = "kl") # Compute generic set of measures to evaluate the quality of the factorization