def run_nmf(V, rank=12, max_iter=5000):
    """
    Run standard nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    # Euclidean

    nmf = nimfa.Nmf(V,
                    seed="random_vcol",
                    rank=rank,
                    max_iter=max_iter,
                    update='euclidean',
                    objective='fro')
    fit = nmf()
    print_info(fit)
    # divergence
    nmf = nimfa.Nmf(V,
                    seed="random_vcol",
                    rank=rank,
                    max_iter=max_iter,
                    initialize_only=True,
                    update='divergence',
                    objective='div')
    fit = nmf()
    return print_info(fit)
示例#2
0
def generateRssPlot(spectra_data):
    data = np.transpose(spectra_data.values)
    #Reorganizes each column of the data matrix
    permutated_data = permuteColumns(data)
    
    #Range of numbers from 30-100 with interval of 2 - change this to change the k-values to test
    k = np.arange(30, 100, 2)
    rss = []

    #Loops through each k value and creates an NMF model for both the permutated data and original data
    #Adds the ratio between the two rss values to an array
    for x in k:
        nmf_model = nimfa.Nmf(data, rank=x)
        nmf_model()
        data_rss = nmf_model.rss()
        permutation_model = nimfa.Nmf(permutated_data, rank=x)
        permutation_model()
        permutation_rss = permutation_model.rss()
        rss.append(data_rss/permutation_rss)

    print(k)
    print(rss)

    ax = graphSetup("MassSpectra RSS Plot", "K Value (# of Basis Vectors)", "Ratio Between RSS Value", [np.min(k), np.max(k)], [int(np.min(rss)), mt.ceil(np.max(rss))])
   
    rss_plt = ax.plot(k, rss)

    savePlot()
示例#3
0
def NMFRun(M_run, args, projdir, samples, subtypes_dict):
    if args.rank > 0:
        if args.verbose:
            eprint("Running NMF with rank =", args.rank)

        model = nimfa.Nmf(M_run,
            rank=args.rank,
            update="divergence",
            objective='div',
            n_run=1,
            max_iter=200)
        model_fit = model()
        evar = model_fit.fit.evar()
        maxind = args.rank

    elif args.rank == 0:
        if args.verbose:
            eprint("Finding optimal rank for NMF...")
        evarprev = 0
        for i in range(1,6):
            model = nimfa.Nmf(M_run,
                rank=i,
                update="divergence",
                objective='div',
                n_run=1,
                max_iter=200)
            model_fit = model()
            evar = model_fit.fit.evar()
            if args.verbose:
                eprint("Explained variance for rank " + str(i) + ":", evar)
            # if evar > 0.8:
            if(i > 2 and evar - evarprev < 0.001):
                if args.verbose:
                    eprint(textwrap.dedent("""\
                            Stopping condition met: <0.1 percent difference
                            in explained variation between ranks
                            """))
                    model = nimfa.Nmf(M_run,
                        rank=i-1,
                        update="divergence",
                        objective='div',
                        n_run=1,
                        max_iter=200)
                    model_fit = model()
                break
            evarprev = evar

    W = model_fit.basis()
    H = model_fit.coef()

    out = collections.namedtuple('Out', ['W', 'H'])(W, H)
    return out
示例#4
0
def extract(genomes, totalIterationsPerCore, numberSignaturesToExtract,
            WPerCore, HPerCore, genomeErrorsPerCore,
            genomesReconstructedPerCore):

    totalMutationTypes = size(data, 0)
    totalGenomes = size(data, 1)
    processCount = 0

    for i in range(totalIterationsPerCore):
        #replacing zeroes w small number to avoid underflow
        bootstrapGenomes = numpy.maximum(bootstrapCancerGenomes(genomes),
                                         numpy.finfo(numpy.float32).eps)
        nmf = nimfa.Nmf(bootstrapGenomes,
                        max_iter=MAX_ITER,
                        rank=numberSignaturesToExtract,
                        update=UPDATE_EQUATION,
                        objective=OBJECTIVE_FUNC,
                        conn_change=CONN_CHANGE,
                        test_conv=TEST_CONV)  # max iter is actual 1 mill
        nmf_fit = nmf()
        for j in range(numberSignaturesToExtract):

            total = sum(nmf_fit.basis()[:, j])
            nmf_fit.basis()[:, j] = nmf_fit.basis()[:, j] / total
            nmf_fit.coef()[j, :] = nmf_fit.coef()[j, :] / total

        genomeErrorsPerCore[:, :, i] = bootstrapGenomes - nmf_fit.basis(
        ) * nmf_fit.coef()
        genomesReconstructedPerCore[:, :, i] = nmf_fit.basis() * nmf_fit.coef()
        WPerCore[:,
                 processCount:(processCount +
                               numberSignaturesToExtract)] = nmf_fit.basis()
        HPerCore[processCount:(processCount +
                               numberSignaturesToExtract), :] = nmf_fit.coef()
        processCount = processCount + numberSignaturesToExtract
示例#5
0
def one_run(iterador, df, rank,type_nmf):
    V=np.array(df)
    V=np.transpose(V)
    D={}
    for i in iterador:
        d={}
        if(type_nmf=="nmf"):
            nmf = nimfa.Nmf(V, rank=rank, seed="random_vcol", max_iter=1000000, update='divergence', objective='conn', conn_change=40)
        if(type_nmf=="snmf"):
            nmf = nimfa.Snmf(V, rank=rank, seed="random_vcol", max_iter=1000000, conn_change=40, version = 'l')
        if(type_nmf=="nsnmf"):
            nmf = nimfa.Nsnmf(V, rank=rank, seed="random_vcol", max_iter=1000000, objective='conn', conn_change=40)
        fit = nmf()
        S=fit.summary()
        SS={}
        SS['connectivity']=S['connectivity']
        SS['euclidean']=S['euclidean']
        SS['evar']=S['evar']
        SS['kl']=S['kl']
        SS['rss']=S['rss']
        SS['sparseness']=S['sparseness']
        d['summary']=SS
        d['n_iter']=fit.n_iter
        d['distance']=fit.distance
        H=pd.DataFrame(fit.basis())
        E=extract_norm(H)
        d['basis']=E['P']
        d['coef']=pd.DataFrame(E['R']*fit.coef())
        D[i]=d
    return D
def factorize(V):
    """
    Perform NMF - Divergence factorization on the sparse Medlars data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The Medlars data matrix. 
    :type V: `scipy.sparse.csr_matrix`
    """
    nmf = nimfa.Nmf(V,
                    seed="random_vcol",
                    rank=12,
                    max_iter=15,
                    update="divergence",
                    objective="div")
    print("Algorithm: %s\nInitialization: %s\nRank: %d" %
          (nmf, nmf.seed, nmf.rank))
    fit = nmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - KL Divergence: %5.3f
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" %
          (fit.fit.n_iter, fit.distance(), fit.distance(metric='euclidean'),
           sparse_w, sparse_h))
    return fit.basis(), fit.coef()
示例#7
0
def nnmf(X0,k):
    nnmf=nimfa.Nmf(X0,rank=k,max_iter=10, lambda_w=0.8,lambda_h=0.8)
    fit_nnmf=nnmf()
    # print(fit_nnmf)
    matrix_nn=fit_nnmf.fitted()
    # break
    return matrix_nn
示例#8
0
def pick_rank_vis(data, max_factor, title):
    V = data.T
    nmf = nimfa.Nmf(V, max_iter=1000000, update='euclidean', rank=2, track_error=True)
    r = nmf.estimate_rank(rank_range=range(2,max_factor))
    
    result_array = []
    for rank, vals in r.items():
        result_array.append([rank, vals['rss'], vals['cophenetic']])
    df = pd.DataFrame(result_array, columns=['rank', 'rss', 'coph'])
    
    fig, ax1 = plt.subplots()
    plt.xlabel('Number of Kmer signatures')
    ax2 = ax1.twinx() 
    ax1.set_ylabel('Cophenetic correlation coefficient', color = 'lightsalmon')
    ax2.set_ylabel('RSS', color = 'cadetblue')
    
    for i in df.iterrows():
        coph = df['coph']
        recon_err = df['rss']
        rank = df['rank']
        ax1.plot(df['rank'], coph, color = 'lightsalmon')
        ax2.plot(df['rank'], recon_err, color = 'cadetblue')

    plt.savefig("/pollard/home/abustion/deep_learning_microbiome/analysis/NMF/alexandrov" +
                str(title) +
                "_011418.png", bbox_inches='tight', dpi=300)
def alexandrov(data, max_factor, title):
    
    fig, ax1 = plt.subplots()
    plt.xlabel('Number of Kmer signatures')
    ax2 = ax1.twinx() 
    ax1.set_ylabel('stability', color = 'red')
    ax2.set_ylabel('reconstruction error', color = 'blue')
    
    for i in range(2, max_factor):
        nmf = nimfa.Nmf(
            data.T,
            rank = i,
            max_iter = 1000,
            n_run = 50,
            track_factor = True
            )
        nmf_fit = nmf()
    
        sm = nmf_fit.summary()
        coph = sm['cophenetic']
        recon_err = sm['rss']
    
        ax1.scatter(i, coph, color = 'r')
        ax2.scatter(i, recon_err, color = 'b')
        
    plt.savefig("/pollard/home/abustion/deep_learning_microbiome/analysis/NMF/alexandrov" + 
                str(title) + 
                ".png")
示例#10
0
def RunTumor(datasetname, tumorname, data, mink, maxk, num_iterations, init):

    k_cophs = {}

    data = data.as_matrix()
    data = np.matrix.transpose(data)

    #for k in xrange(mink,maxk):
    for k in range(mink, maxk):
        mat = ComputeAverageConsensusMatrix(data, k, num_iterations, init)
        mat = reorder(mat)
        A = np.asarray(mat)

        k_cophs[k] = coph_cor(A)
        savematrixplot(datasetname, tumorname, A, k)

    print(tumorname, "cophs:", k_cophs)
    savecophcorplot(datasetname, tumorname, k_cophs)
    rank = evaluateStability(k_cophs)
    nmf = nimfa.Nmf(data,
                    rank=rank,
                    seed="random_vcol",
                    max_iter=200,
                    update='euclidean',
                    objective='conn',
                    conn_change=40)
    nmf_fit = nmf()
    generateHeatPlot(datasetname, tumorname, '', data, nmf_fit.basis(),
                     nmf_fit.coef())
示例#11
0
def run_one(V, rank):
    """
    Run standard NMF on leukemia data set. 50 runs of Standard NMF are performed and obtained consensus matrix
    averages all 50 connectivity matrices.

    :param V: Target matrix with gene expression data.
    :type V: `numpy.ndarray`
    :param rank: Factorization rank.
    :type rank: `int`
    """
    print("================= Rank = %d =================" % rank)
    consensus = np.zeros((V.shape[1], V.shape[1]))
    for i in range(50):
        nmf = nimfa.Nmf(V,
                        rank=rank,
                        seed="random_vcol",
                        max_iter=200,
                        update='euclidean',
                        objective='conn',
                        conn_change=40)
        fit = nmf()
        print("%2d/50 : %s - init: %s (%3d/200 iterations)" %
              (i + 1, fit.fit, fit.fit.seed, fit.fit.n_iter))
        consensus += fit.fit.connectivity()
    consensus /= 50.
    p_consensus = reorder(consensus)
    plot(p_consensus, rank)
示例#12
0
 def mf(self,k,max_iter,alpha):
     global matrix
     global W
     vectors = [list(self.model[word]) for word in self.wordList]
     W = np.array(vectors,dtype = np.float32)
     del vectors
     H = np.transpose(np.random.rand(W.shape[1],len(articleDict)))
     for i in range(H.shape[0]):
         rate = 1
         v = self.matrix.getrow(i).toarray()
         for j in range(max_iter):
             g = W.dot(H[i]) - v
             H[i] -= rate * alpha * np.transpose(W).dot(np.array(g[0]))
             H[i] = np.array(map((lambda x:max(x,0)),H[i]))
             rate *= 0.5
     print "Begin NMF"
     nmf = nimfa.Nmf(sparse.csr_matrix(np.transpose(H)),seed = "random_vcol",rank = k,max_iter = 10)
     nmf_fit = nmf()
     H1 = nmf_fit.basis()
     H2 = nmf_fit.coef()
     print "NMF ok"
     construct = W.dot(H1.toarray())
     wordVectors = {}
     myESA = ESA.ESA()
     wordsim = myESA.getWordSim()
     for i in range(len(construct)):
         if self.wordList[i] in wordsim:
             wordVectors[self.wordList[i]] = construct[i]
     cPickle.dump([columnNum,wordVectors],open("data/mc_matrix",'wb'))
def run_one(V, rank):
    """
    Run standard NMF on medulloblastoma data set. 50 runs of Standard NMF are performed and obtained consensus matrix
    averages all 50 connectivity matrices.  
    
    :param V: Target matrix with gene expression data.
    :type V: `numpy.ndarray`
    :param rank: Factorization rank.
    :type rank: `int`
    """
    print("================= Rank = %d =================" % rank)
    consensus = np.zeros((V.shape[1], V.shape[1]))
    for i in range(50):
        nmf = nimfa.Nmf(V,
                        rank=rank,
                        seed="random_vcol",
                        max_iter=200,
                        update='euclidean',
                        objective='conn',
                        conn_change=40)
        fit = nmf()
        print("Algorithm: %s\nInitialization: %s\nRank: %d" %
              (nmf, nmf.seed, nmf.rank))
        consensus += fit.fit.connectivity()
    consensus /= 50.
    p_consensus = reorder(consensus)
    plot(p_consensus, rank)
示例#14
0
 def create_nmf_summary(self, data, ranks, n_runs):
     if any(arg is None for arg in [ranks, n_runs]):
         raise ValueError(
             "Either ranks or n_runs is empty. Recreate the NMFCC class with these parameters inputted."
         )
     nmf = nimfa.Nmf(data, seed="random_vcol")
     summary = nmf.estimate_rank(rank_range=ranks, n_run=n_runs, what='all')
     self.summary = summary
     return summary
def _nmf(E, k):
    if E.min().min() < 0:
        V = E.values - E.min().min()
    else:
        V = E.values

    nmf = nimfa.Nmf(V, rank=int(k), seed="random_vcol", max_iter=20000, update='euclidean')
    fit = nmf()

    return fit.fit.H.A.T
示例#16
0
def NMFfeatures_helper(h):    
    f = np.loadtxt("../miscs/{0}/taxi-CA-h{1}.matrix".format(year, h), delimiter=" ")
    d1, d2 = f.shape
    assert d1 == d2 and d1 == 77
    
    nmf = nimfa.Nmf(f, rank=4, max_iter=100) #, update="divergence", objective="conn", conn_change=50)
    nmf_fit = nmf()
    src = nmf_fit.basis()
    dst = nmf_fit.coef()
    res = np.concatenate( (src, dst.T), axis=1 )
    assert res.shape == (77, 8)
    return res
示例#17
0
def NMFfeatures():
    f = np.loadtxt("../miscs/taxiFlow.csv", delimiter=",")
    nmf = nimfa.Nmf(f,
                    rank=4,
                    max_iter=30,
                    update="divergence",
                    objective="conn",
                    conn_change=50)
    nmf_fit = nmf()
    src = nmf_fit.basis()
    dst = nmf_fit.coef()
    return np.concatenate((src, dst.T), axis=1)
def nmf_library(V, W_init, correct_H):
    #comparisons with non-negative matrix factorization
    lsnmf = nimfa.Lsnmf(V,
                        seed=None,
                        rank=3,
                        max_iter=100,
                        H=np.array([0., 0., 0.]).reshape(-1, 1),
                        W=W_init)
    nmf = nimfa.Nmf(V,
                    seed=None,
                    rank=3,
                    max_iter=100,
                    H=np.array([0., 0., 0.]).reshape(-1, 1),
                    W=W_init)
    icm = nimfa.Icm(V,
                    seed=None,
                    rank=3,
                    max_iter=100,
                    H=np.array([0., 0., 0.]).reshape(-1, 1),
                    W=W_init)
    bd = nimfa.Bd(V,
                  seed=None,
                  rank=3,
                  max_iter=100,
                  H=np.array([0., 0., 0.]).reshape(-1, 1),
                  W=W_init)
    pmf = nimfa.Pmf(V,
                    seed=None,
                    rank=3,
                    max_iter=100,
                    H=np.array([0., 0., 0.]).reshape(-1, 1),
                    W=W_init)
    #lfnmf = nimfa.Lfnmf(V, seed=None, rank=3, max_iter=100, H = np.array([0.,0.,0.]).reshape(-1,1), W = W_init)

    lsnmf_fit = lsnmf()
    nmf_fit = nmf()
    icm_fit = icm()
    bd_fit = bd()
    pmf_fit = pmf()

    lsnmf_error = mean_absolute_error(
        correct_H, normalized(np.array(lsnmf.H).reshape(-1, )))
    nmf_error = mean_absolute_error(correct_H,
                                    normalized(np.array(nmf.H).reshape(-1, )))
    icm_error = mean_absolute_error(correct_H,
                                    normalized(np.array(icm.H).reshape(-1, )))
    bd_error = mean_absolute_error(correct_H,
                                   normalized(np.array(bd.H).reshape(-1, )))
    pmf_error = mean_absolute_error(correct_H,
                                    normalized(np.array(pmf.H).reshape(-1, )))

    return [lsnmf_error, nmf_error, icm_error, bd_error, pmf_error]
示例#19
0
def do_nmf(V):
    nmf = nimfa.Nmf(V, seed='random_vcol', rank=10, max_iter=100)
    nmf_fit = nmf()

    W = nmf_fit.basis()
    print('Basis matrix:\n%s' % W)

    H = nmf_fit.coef()
    print('Mixture matrix:\n%s' % H)

    print("starting")
    r = nmf.estimate_rank(rank_range=[10, 15, 20, 25], what='all')
    # pp_r = '\n'.join('%d: %5.3f' % (rank, vals['all']) for rank, vals in r.items())
    print('Rank estimate:\n%s' % r)
示例#20
0
    def factorization(self, cv_results_file):
        """
        Matrix factorization, saves predictions to self.predictions and mask to self.mask

        :param cv_results_file: file for saving cv scores
        """
        print('\nDfmf')
        selected_features = self.selected_features
        mask = self.split_train_test(self.users_ratings, 0.2)

        R12 = self.users_ratings
        R23 = selected_features
        R14 = self.users

        # Parameters choice
        print('\nParameters\n')
        #parameters = [2, 4, 6, 8, 10]
        parameters = [2, 4, 6, 8, 10, 12]
        k = 3
        #best_p_t1 = self.cross_validation(k, parameters, mask, R12, cv_results_file)
        #print(str(best_p_t1) + '\n')

        best_p_t1 = 10
        #best_p_t2 = 12
        #best_p_t3 = 2
        #best_p_t4 = 2

        V = spr.csr_matrix(R12)
        #V.todense()

        nmf = nimfa.Nmf(V,
                        max_iter=200,
                        rank=best_p_t1,
                        update='euclidean',
                        objective='fro')
        nmf_fit = nmf()

        W = nmf_fit.basis()

        H = nmf_fit.coef()

        sm = nmf_fit.summary()
        #R12_pred = np.dot(W.todense(), H.todense())
        R12_pred = np.dot(W, H)

        self.predictions = R12_pred
        self.mask = mask
        self.true_values = R12
示例#21
0
    def calculate_nmf_error(self, mixture, n_bases, dist_type, iterations,
                            attempts, seed):
        div = nussl.transformers.TransformerNMF.KL_DIVERGENCE
        nimfa_type = 'divergence' if dist_type == div else dist_type

        for i in range(attempts):
            # Set up nussl NMF
            nussl_nmf = nussl.TransformerNMF(mixture,
                                             n_bases,
                                             max_num_iterations=iterations,
                                             distance_measure=dist_type,
                                             seed=seed)
            # Run nussl NMF
            nussl_nmf.transform()

            # Set up nimfa NMF
            nimfa_nmf = nimfa.Nmf(mixture,
                                  max_iter=iterations,
                                  rank=n_bases,
                                  update=nimfa_type,
                                  W=nussl_nmf.template_dictionary,
                                  H=nussl_nmf.activation_matrix
                                  )  # init to same matrices as nussl

            # Run nimfa NMF
            nmf_fit = nimfa_nmf()

            # Dot the results
            nimfa_est = np.dot(nmf_fit.basis(), nmf_fit.coef())
            nussl_est = np.dot(nussl_nmf.template_dictionary,
                               nussl_nmf.activation_matrix)

            # calculate errors
            max_nussl_error = np.max(np.abs(nussl_est - mixture) / mixture)
            max_nimfa_error = np.max(np.abs(nimfa_est - mixture) / mixture)
            max_diff = max_nussl_error - max_nimfa_error

            # IF nussl's max error is bigger than nimfa's
            # AND nussl's max error bigger than the specified max error (0.05, or 5%)
            # AND the difference between the max errors is larger than 0.05
            # THEN we throw an exception
            # i.e., make sure nussl's results are close to nimfa's
            if max_nussl_error > max_nimfa_error \
                    and max_nussl_error > self.max_error_pct \
                    and max_diff > self.max_error_pct:
                raise Exception(
                    'max nussl error is larger than nimfa and self.max_error_pct'
                )
示例#22
0
def _nmf(E, k):
    if E.min().min() < 0:
        V = E.as_matrix() - E.min().min()
    else:
        V = E.as_matrix()

    nmf = nimfa.Nmf(V,
                    rank=int(k),
                    seed="random_vcol",
                    max_iter=20000,
                    update='euclidean')
    fit = nmf()

    print(nmf.rss())

    return fit.fit.H.A.T
示例#23
0
 def NMFmod(self, rank):
 
     prng = np.random.RandomState(self.seed)
     W_init = prng.rand(self.M_run.shape[0], rank)
     H_init = prng.rand(rank, self.M_run.shape[1])
     
     model = nimfa.Nmf(self.M_run,
         rank=rank,
         # seed=None,
         H=H_init,
         W=W_init,
         update="divergence",
         objective='div',
         n_run=1,
         max_iter=200)
     return model
示例#24
0
def GetLatentSpace(train, rank=10):

    model = nimfa.Nmf(train.todense(),
                      seed='random_vcol',
                      rank=rank,
                      max_iter=100)
    mfit = model()
    M = np.array(mfit.coef())
    N = np.array(mfit.basis())

    # GetLatentSpace CustomerNameIdxs
    M = M.T
    M_normalized = M / np.linalg.norm(M, axis=1)[:, np.newaxis]

    # GetLatentSpace tickers
    N = N
    N_normalized = N / np.linalg.norm(N, axis=1)[:, np.newaxis]

    return M_normalized, N_normalized
示例#25
0
def NMFfeatures(h):
    t = np.genfromtxt("../miscs/{0}/taxi-h{1}.vec".format(year, h), delimiter=" ", skip_header=1)
    tid = t[:,0]
    l = len(tid)
    tid = tid.astype(int)
    tid.sort()
    idx = np.searchsorted(sortedId, tid)
    print "@hour {0}, #regions {1}".format(h, len(idx))
    
    f = np.loadtxt("../miscs/{0}/taxi-h{1}.matrix".format(year, h), delimiter=",")
    fp = f[idx,:]
    fp = fp[:, idx]
    assert fp.shape==(l, l)
    
    nmf = nimfa.Nmf(fp, rank=10, max_iter=30, update="divergence", objective="conn", conn_change=50)
    nmf_fit = nmf()
    src = nmf_fit.basis()
    dst = nmf_fit.coef()
    
    return np.concatenate( (src, dst.T), axis=1 ), tid
示例#26
0
def NMF(V, rank):
    '''Run NMF for a target matrix.
  V: the target matrix to decompose.'''
    nmf = nimfa.Nmf(V,
                    seed="random_c",
                    rank=rank,
                    n_run=1,
                    max_iter=2000,
                    update='divergence',
                    objective='div')
    nmf_fit = nmf()
    W = nmf_fit.basis()
    K = W.shape[1]
    print('Stop at iteration #: %d' % nmf_fit.summary()["n_iter"])
    return {
        "W": W,
        "H": nmf_fit.coef(),
        "K": K,
        "KLD": nmf_fit.distance(metric='kl')
    }
示例#27
0
    def run(self, output_file):
        print "Running non-negative MF....", strftime(
            "%Y-%m-%d %H:%M:%S", gmtime())
        if self.method == 'nmf':
            modelnmf = nimfa.Nmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "lfnmf":
            modelnmf = nimfa.Lfnmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "nsnmf":
            modelnmf = nimfa.Nsnmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "pmf":
            modelnmf = nimfa.Pmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "psmf":
            modelnmf = nimfa.Psmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "snmf":
            modelnmf = nimfa.Snmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "sepnmf":
            modelnmf = nimfa.Sepnmf(self.mat, rank=self.rank, max_iter=self.iter)
        else:
            print "No model is being recognized, stopped."
            sys.exit(1)

        model = modelnmf()
        self.result = np.array(model.fitted())
        print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime())


        print "Write results to file.", strftime("%Y-%m-%d %H:%M:%S", gmtime())
        with open(output_file, "r+") as file:
            query = file.readlines()
            file.seek(0)
            file.truncate()

            for line in query:
                list = line.split()
                newline = "%s %s %f\n" % (
                    list[0], list[1],
                    self.result[int(list[0])][int(list[1])]
                )
                file.write(newline)
    def produce(self, inputs):

        warnings.filterwarnings(
            "ignore")  # for removing warnings thrown by nimfa
        # for testing
        # a = numpy.array([[1,0,1,0,1],[1,0,1,0,1],[1,0,1,0,1]])
        # b = numpy.array([[1,0],[1,0],[1,0],[1,0],[1,0]])
        # print(type(a))
        # print(type(self._W[0]))
        nmf = nimfa.Nmf(V=numpy.array(inputs.values),
                        seed=self._seed,
                        W=self._W[0],
                        H=self._H[0],
                        rank=self._rank,
                        update=self._update,
                        objective=self._objective,
                        min_residuals=self._learning_rate)
        nmf_fit = nmf()
        W = nmf_fit.basis()
        H = nmf_fit.coef()

        column_names = [
            'row_latent_vector_' + str(i) for i in range(self._rank)
        ]
        W = pd.DataFrame(data=W, columns=column_names)
        # print(type(W))

        #TODO: Column latent vector
        column_names = [
            'column_latent_vector_' + str(i) for i in range(inputs.shape[1])
        ]
        H = pd.DataFrame(data=H, columns=column_names)

        W.reset_index(drop=True, inplace=True)
        H.reset_index(drop=True, inplace=True)
        result = pd.concat([W, H], axis=1)
        # print(result.head(10))
        return result
示例#29
0
    def getNmf(cls, path="../../data/intern_samplelog.csv"):
        #data = pandas.read_table(path,header=0)
        data = pandas.read_csv(path)
        data['site'] = data['site'].str.replace(
            "media_", ""
        )  #http://stackoverflow.com/questions/24037507/converting-string-objects-to-int-float-using-pandas
        #data["floor_price"]=data["floor_price"].str.replace("NA","0")
        data["user"] = data["user"].str.replace("user_", "")
        del data["click"]
        del data["advertiser"]
        del data["os"]
        del data["floor_price"]
        #data["os"]=data["os"].str.replace("iOS","1")
        #data["os"]=data["os"].str.replace("Android","2")
        vec = np.matrix(data.as_matrix())
        nmf = nimfa.Nmf(vec, seed='random_vcol', rank=20, max_iter=50)
        nmf_fit = nmf()

        print('Rss: %5.4f' % nmf_fit.fit.rss())
        print('Evar: %5.4f' % nmf_fit.fit.evar())
        print('K-L divergence: %5.4f' % nmf_fit.distance(metric='kl'))
        print('Sparseness, W: %5.4f, H: %5.4f' % nmf_fit.fit.sparseness())
        return nmf_fit
示例#30
0
def generateSpectraPlot(spectra_data):
    bin_lower_bounds = []

    # Loops through each column header in the .csv file to get the lower bound for plotting
    for column in spectra_data.columns:
        bound = re.findall(r"[-+]?\d*\.\d+|\d+", column) # Parses the float bound from the column header
        bin_lower_bounds.append(float(bound[0]))
    
    ax = graphSetup("MassSpectra NMF Basis Vector Plot", "Bin Lower Bounds [m/z]", r"$Intensity\,[\%]$", [np.min(bin_lower_bounds), np.max(bin_lower_bounds)], [0,100])
    # Convert to np array and transpose it so that the bin numbers are the rows and it's vectors of spectra intensity
    data = np.transpose(spectra_data.values)
    nmf_model = nimfa.Nmf(data)
    basis = nmf_model().basis()
    intensities = []

    for vector in basis:
        print(np.linalg.norm(vector))
        intensities.append(np.linalg.norm(vector)) # Adds the magnitude of the intensity vector to the array for graphing

    intensities = intensities/np.max(intensities) * 100
    spectra_plt = ax.bar(bin_lower_bounds, intensities)

    savePlot()