示例#1
0
def run_snmf(V):
    """
    Run sparse nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    # SNMF/R
    rank = 10
    snmf = nimfa.Snmf(V,
                      seed="random_c",
                      rank=rank,
                      max_iter=12,
                      version='r',
                      eta=1.,
                      beta=1e-4,
                      i_conv=10,
                      w_min_change=0)
    fit = snmf()
    print_info(fit)
    # SNMF/L
    snmf = nimfa.Snmf(V,
                      seed="random_vcol",
                      rank=rank,
                      max_iter=12,
                      version='l',
                      eta=1.,
                      beta=1e-4,
                      i_conv=10,
                      w_min_change=0)
    fit = snmf()
    print_info(fit)
示例#2
0
def one_run(iterador, df, rank,type_nmf):
    V=np.array(df)
    V=np.transpose(V)
    D={}
    for i in iterador:
        d={}
        if(type_nmf=="nmf"):
            nmf = nimfa.Nmf(V, rank=rank, seed="random_vcol", max_iter=1000000, update='divergence', objective='conn', conn_change=40)
        if(type_nmf=="snmf"):
            nmf = nimfa.Snmf(V, rank=rank, seed="random_vcol", max_iter=1000000, conn_change=40, version = 'l')
        if(type_nmf=="nsnmf"):
            nmf = nimfa.Nsnmf(V, rank=rank, seed="random_vcol", max_iter=1000000, objective='conn', conn_change=40)
        fit = nmf()
        S=fit.summary()
        SS={}
        SS['connectivity']=S['connectivity']
        SS['euclidean']=S['euclidean']
        SS['evar']=S['evar']
        SS['kl']=S['kl']
        SS['rss']=S['rss']
        SS['sparseness']=S['sparseness']
        d['summary']=SS
        d['n_iter']=fit.n_iter
        d['distance']=fit.distance
        H=pd.DataFrame(fit.basis())
        E=extract_norm(H)
        d['basis']=E['P']
        d['coef']=pd.DataFrame(E['R']*fit.coef())
        D[i]=d
    return D
def factorize(data):
    """
    Perform factorization on S. cerevisiae FunCat annotated sequence data set (D1 FC seq).
    
    Return factorized data, this is matrix factors as result of factorization (basis and mixture matrix). 
    
    :param data: Transformed data set containing attributes' values, class information and possibly additional meta information.  
    :type data: `tuple`
    """
    V = data['attr']
    snmf = nimfa.Snmf(V,
                      seed="random_vcol",
                      rank=40,
                      max_iter=5,
                      version="l",
                      eta=1.,
                      beta=1e-4,
                      i_conv=10,
                      w_min_change=0)
    print("Algorithm: %s\nInitialization: %s\nRank: %d" %
          (snmf, snmf.seed, snmf.rank))
    fit = snmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - KL Divergence: %5.3f
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" %
          (fit.fit.n_iter, fit.distance(), fit.distance(metric='euclidean'),
           sparse_w, sparse_h))
    data['W'] = fit.basis()
    data['H'] = fit.coef()
    return data
示例#4
0
def param_sweep_rss(V, k_range, beta_range):
    k_length = k_range.size
    beta_length = beta_range.size
    nEdges = V.shape[0]  #num rows
    nBlocks = V.shape[1]  #num cols

    parameter_space = np.zeros((k_length, beta_length))
    eta = np.max(V)**2

    for ii in np.arange(k_length):
        for jj in np.arange(beta_length):
            print(jj)
            k = k_range[ii]
            beta = beta_range[jj]

            #fctr = nimfa.mf(V, seed = 'nndsvd', rank=k, \
            #	method='snmf', max_iter=30, initialize_only=True, \
            #	version='r', eta = eta, beta = beta, i_conv = 10, w_min_change = 0)
            #fctr_res = nimfa.mf_run(fctr)

            snmf = nimfa.Snmf(c,
                              seed="nndsvd",
                              rank=k,
                              max_iter=30,
                              version='r',
                              eta=eta,
                              beta=beta,
                              i_conv=10,
                              w_min_change=0)
            fctr_res = snmf()
            parameter_space[ii, jj] = fctr_res.fit.rss()
        print(ii)

        return {'parameter_space': parameter_space}
示例#5
0
def factorize(V):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `numpy.matrix`
    """
    snmf = nimfa.Snmf(V,
                      seed="random_vcol",
                      rank=30,
                      max_iter=30,
                      version='r',
                      eta=1.,
                      beta=1e-4,
                      i_conv=10,
                      w_min_change=0)
    print("Algorithm: %s\nInitialization: %s\nRank: %d" %
          (snmf, snmf.seed, snmf.rank))
    fit = snmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print(
        """Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" %
        (fit.fit.n_iter, fit.distance(metric='euclidean'), sparse_w, sparse_h))
    return fit.basis(), fit.coef()
示例#6
0
 def getWH(self, x_input, rank=10):
     snmf = nimfa.Snmf(x_input, seed="random_c", rank=rank, max_iter=12, version='r', eta=1.,
                         beta=1e-4, i_conv=10, w_min_change=0)
     snmf_fit = snmf()
     W = snmf.basis()
     H = snmf.coef()
     return W, H
示例#7
0
def cons_sig(df):
    kk = 1
    np.random.seed(0)
    w_r = np.random.random((df.shape[0], kk))
    h_r = np.random.random((kk,df.shape[1]))
    betaa = 0.0
    snmf0 = nimfa.Snmf(np.matrix(df),rank=kk, beta = betaa ,max_iter=1000,  W = w_r, H = h_r,version='r', min_residuals  = 0.0000001)
    snmf0_fit = snmf0()
    W00 = snmf0_fit.fit.W
    H0 = snmf0_fit.fit.H
    return W00
示例#8
0
def separate_stains_xu_snmf(im_sda, w_init=None, beta=0.2):
    """Compute the stain matrix for color deconvolution with SNMF.

    ... (sparse non-negative matrix factorization).

    Parameters
    ----------
    im_sda : array_like
        Image (MxNx3) or matrix (3xN) in SDA space for which to compute the
        stain matrix.
    w_init : array_like, default is None
        Initial value for the stain matrix.  if not provided, default
        initialization is used.
    beta : float
        Regularization factor for the sparsity of the deconvolved pixels

    Returns
    -------
    w : array_like
        A 3x3 matrix of stain column vectors

    Note
    ----
    All input pixels are used in the factorization.

    See Also
    --------
    histomicstk.preprocessing.color_deconvolution.color_deconvolution
    histomicstk.preprocessing.color_deconvolution.separate_stains_macenko_pca

    References
    ----------
    .. [#] Van Eycke, Y. R., Allard, J., Salmon, I., Debeir, O., &
           Decaestecker, C. (2017).  Image processing in digital pathology: an
           opportunity to solve inter-batch variability of immunohistochemical
           staining.  Scientific Reports, 7.
    .. [#] Xu, J., Xiang, L., Wang, G., Ganesan, S., Feldman, M., Shih, N. N.,
           ... & Madabhushi, A. (2015). Sparse Non-negative Matrix Factorization
           (SNMF) based color unmixing for breast histopathological image
           analysis.  Computerized Medical Imaging and Graphics, 46, 20-29.

    """
    # Image matrix
    m = utils.convert_image_to_matrix(im_sda)
    m = utils.exclude_nonfinite(m)
    factorization = \
        nimfa.Snmf(m, rank=m.shape[0] if w_init is None else w_init.shape[1],
                   W=w_init,
                   H=None if w_init is None else np_linalg.pinv(w_init).dot(m),
                   beta=beta)
    factorization.factorize()
    return htk_linalg.normalize(numpy.array(factorization.W))
def SNMF(X, k=20):

    snmf = nimfa.Snmf(X0, rank=k, max_iter=10)

    snmf_fit = snmf()

    # W=bmf.W
    # H=bmf.H
    # print(W.shape())
    # print(H.shape())
    # break
    target = snmf_fit.fitted()
    return target
示例#10
0
def run_nmf(nSubj, nNodes, k, beta, input_filename, output_filename):

    triuIdx = np.triu_indices(nNodes, k=1)

    filename = input_filename
    save_file = output_filename
    print(filename)
    h5f = h5py.File(filename, 'r')
    c = h5f['config_matrix']
    print(c.shape)

    eta = np.max(c)**2

    #	fctr = nimfa.mf(c, seed = 'nndsvd', rank = k, \
    #	method = 'snmf', max_iter = 30, initialize_only = True, \
    #	version = 'r', eta = eta, beta = beta, i_conv = 10, w_min_change = 0)
    #	fctr_res = nimfa.mf_run(fctr)

    snmf = nimfa.Snmf(c, seed="nndsvd", rank=k, max_iter=30, version='r', eta=eta, \
    beta=beta, i_conv=10, w_min_change=0)
    fctr_res = snmf()

    #output matrices
    basis = np.array(fctr_res.basis())
    expr = np.array(fctr_res.coef()).T

    #reshape subnetworks
    coactMatr = np.zeros((k, nNodes, nNodes))
    for c in np.arange(k):
        basisNet = np.zeros((nNodes, nNodes))
        basisNet[triuIdx[0], triuIdx[1]] = basis[:, c]
        basisNet += basisNet.T
        coactMatr[c, ...] = basisNet[...]

    #save output
    f = h5py.File(save_file, 'w')
    f.create_dataset('subnetworks', data=coactMatr)
    f.create_dataset('timeseries', data=expr)

    h5f.close()
    f.close()
示例#11
0
def factorize(V, rank_, algorithm='snmf'):
    if algorithm == 'snmf':
        snmf = nimfa.Snmf(V,
                          seed="random_vcol",
                          rank=rank_,
                          max_iter=30,
                          version='r',
                          eta=1.,
                          beta=1e-4,
                          i_conv=10,
                          w_min_change=0)
        print("Algorithm: %s\nInitialization: %s\nRank: %d" %
              (snmf, snmf.seed, snmf.rank))
        fit = snmf()
        sparse_w, sparse_h = fit.fit.sparseness()
        print(
            """Stats:- iterations: %d - Euclidean distance: %5.3f - Sparseness basis: %5.3f, mixture: %5.3f"""
            % (fit.fit.n_iter, fit.distance(metric='euclidean'), sparse_w,
               sparse_h))
        return fit.basis(), fit.coef()
    return 1, 1
示例#12
0
    def run(self, output_file):
        print "Running non-negative MF....", strftime(
            "%Y-%m-%d %H:%M:%S", gmtime())
        if self.method == 'nmf':
            modelnmf = nimfa.Nmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "lfnmf":
            modelnmf = nimfa.Lfnmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "nsnmf":
            modelnmf = nimfa.Nsnmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "pmf":
            modelnmf = nimfa.Pmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "psmf":
            modelnmf = nimfa.Psmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "snmf":
            modelnmf = nimfa.Snmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "sepnmf":
            modelnmf = nimfa.Sepnmf(self.mat, rank=self.rank, max_iter=self.iter)
        else:
            print "No model is being recognized, stopped."
            sys.exit(1)

        model = modelnmf()
        self.result = np.array(model.fitted())
        print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime())


        print "Write results to file.", strftime("%Y-%m-%d %H:%M:%S", gmtime())
        with open(output_file, "r+") as file:
            query = file.readlines()
            file.seek(0)
            file.truncate()

            for line in query:
                list = line.split()
                newline = "%s %s %f\n" % (
                    list[0], list[1],
                    self.result[int(list[0])][int(list[1])]
                )
                file.write(newline)
示例#13
0
    def predict(self, X):
        """
        :param X: with shape (n_pixel, n_band)
        :return:
        """
        # # Note that X has to reshape to (n_fea., n_sample)
        # XX = X.transpose()  # (n_band, n_pixel)
        # snmf = nimfa.Snmf(X, seed="random_c", rank=self.n_band)  # remain para. default
        snmf = nimfa.Snmf(X,
                          rank=self.n_band,
                          max_iter=20,
                          version='r',
                          eta=1.,
                          beta=1e-4,
                          i_conv=10,
                          w_min_change=0)
        snmf_fit = snmf()
        W = snmf.basis()  # shape: n_band * k
        H = snmf.coef()  # shape: k * n_pixel

        #  get clustering res.
        H = np.asarray(H)
        indx_sort = np.argsort(H, axis=0)  # ascend order
        cluster_res = indx_sort[-1].reshape(-1)

        #  select band
        selected_band = []
        for c in np.unique(cluster_res):
            idx = np.nonzero(cluster_res == c)
            center = np.mean(X[:, idx[0]], axis=1).reshape((-1, 1))
            distance = np.linalg.norm(X[:, idx[0]] - center, axis=0)
            band_ = X[:, idx[0]][:, distance.argmin()]
            selected_band.append(band_)
        while selected_band.__len__() < self.n_band:
            selected_band.append(np.zeros(X.shape[0]))
        bands = np.asarray(selected_band).transpose()
        return bands
示例#14
0
    args = argument_parser.parse_args()

    node_feature = args.node_feature
    out_prefix = args.output_prefix
    out_dir = args.output_dir

    refex_features = np.loadtxt(node_feature, delimiter=',')
    actual_fx_matrix = refex_features[:, 1:]

    n, f = actual_fx_matrix.shape
    print 'Number of Features: ', f
    print 'Number of Nodes: ', n

    sparsity_threshold = 2.0
    for i in xrange(1, 6):
        for rank in xrange(20, 29 + 1):
            snmf = nimfa.Snmf(actual_fx_matrix,
                              seed="random_vcol",
                              version='r',
                              rank=rank,
                              beta=2.0)
            snmf_fit = snmf()
            G = np.asarray(snmf_fit.basis())
            F = np.asarray(snmf_fit.coef())

            w_out = '%s-%s-%s-nodeRoles.txt' % (rank, i, out_prefix)
            h_out = '%s-%s-%s-roleFeatures.txt' % (rank, i, out_prefix)

            np.savetxt(out_dir + w_out, X=G)
            np.savetxt(out_dir + h_out, X=F)
示例#15
0
    return Y, X


V = np.random.rand(1000, 40)

start = current_milli_time()
w, h = sparse_right_nmf(V, rank=15, max_iters=10, beta=2.0)
end = current_milli_time()
# w, h =nmf(V, k=4, max_iters=30)
a = np.abs(V - np.dot(w, h))
c = LA.norm(a, 'fro')
print c, (end - start)

start = current_milli_time()
snmf = nimfa.Snmf(V,
                  seed="random_vcol",
                  version='r',
                  rank=15,
                  beta=2.0,
                  max_iter=10)
snmf_fit = snmf()

G = np.asarray(snmf_fit.basis())
F = np.asarray(snmf_fit.coef())
end = current_milli_time()

a = np.abs(V - np.dot(G, F))
c = LA.norm(a, 'fro')
print c, (end - start)
示例#16
0
def NMFAnalysis(filename,Rank,turn=0,strategy="conservative"):
    
    X=[]
    header=[]
    head=0
    exportnam=export.findParentDir(filename)+'/NMF/round'+str(turn)+'NMFsnmf_versionr.txt'#+str(Rank)+'.txt'
    export_res=export.ExportFile(exportnam)
    exportnam_bin=export.findParentDir(filename)+'/NMF/round'+str(turn)+'NMFsnmf_binary.txt'#+str(Rank)+'.txt'
    export_res1=export.ExportFile(exportnam_bin)
    exportnam_bint=export.findParentDir(filename)+'/NMF/round'+str(turn)+'NMFsnmf_binary_t_.txt'#+str(Rank)+'.txt'
    export_res5=export.ExportFile(exportnam_bint)
    exportnam2=export.findParentDir(filename)+'/SubtypeAnalyses/round'+str(turn)+'Metadata.txt'#+str(Rank)+'.txt'
    export_res2=export.ExportFile(exportnam2)
    exportnam3=export.findParentDir(filename)+'/SubtypeAnalyses/round'+str(turn)+'Annotation.txt'#+str(Rank)+'.txt'
    export_res3=export.ExportFile(exportnam3)
    if 'Clustering' in filename:
        count=1
        start=2
    else:
        count=0
        start=1
        
    print filename
    for line in open(filename,'rU').xreadlines():
        line=line.rstrip('\r\n')
        q= string.split(line,'\t')
        if head >count:
            val=[]
            val2=[]
            me=0.0
            
            for i in range(start,len(q)):
                try:
                    val2.append(float(q[i]))
                except Exception:
                    continue
            me=np.median(val2)
            for i in range(start,len(q)):
                try:
                    val.append(float(q[i]))
                except Exception:
                    val.append(float(me))
            X.append(val)
          
        else:
            export_res1.write(line)
            export_res.write(line)
            export_res1.write("\n")
            export_res.write("\n")
            header=q
            head+=1
            continue

    group=defaultdict(list)
        
    sh=[]
    X=np.array(X)
    mat=[]
    mat=zip(*X)
    mat=np.array(mat)
    nmf = nimfa.Snmf(mat,seed="nndsvd", rank=int(Rank), max_iter=20,n_run=10,track_factor=True)
    nmf_fit = nmf()
    W = nmf_fit.basis()
    W=np.array(W)
    H=nmf_fit.coef()
    H=np.array(H)

    sh=W.shape
    export_res3.write("uid\tUID\tUID\n")
    if int(Rank)==2:
        par=1
    else:
        par=2

    W=zip(*W)
    W=np.array(W)
    sh=W.shape
    Z=[]
    for i in range(sh[0]):
        new_val=[]
        val=W[i,:]
        num=sum(i > 0.10 for i in val)
        if num >40 or num <3:
            compstd=True
        else:
            compstd=False
        me=np.mean(val)
        st=np.std(val)
        #print 'V'+str(i)
        export_res.write('V'+str(i))
        export_res1.write('V'+str(i))
        for j in range(sh[1]):
            if compstd:   
                if float(W[i][j])>=float(me+(par*st)):
                
                    export_res1.write("\t"+str(1))
                    new_val.append(1)
                else:
                    export_res1.write("\t"+str(0))
                    new_val.append(0)
            else:
                if float(W[i][j])>0.1:
                
                    export_res1.write("\t"+str(1))
                    new_val.append(1)
                else:
                    export_res1.write("\t"+str(0))
                    new_val.append(0)
            export_res.write("\t"+str(W[i][j]))
        Z.append(new_val)
        export_res.write("\n")
        export_res1.write("\n")
        
    Z=np.array(Z)
    sh=Z.shape
    Z_new=[]
    val1=[]
    Z1=[]
    dellst=[]
    export_res2.write("uid")
    export_res5.write("uid")
    for i in range(sh[0]):
        indices=[]
        val1=Z[i,:]
        sum1=sum(val1)
        flag=False
        indices=[index for index, value in enumerate(val1) if value == 1]
        for j in range(sh[0]):
            val2=[]
            
            if i!=j:
                val2=Z[j,:]
                
                sum2=sum([val2[x] for x in indices])
                summ2=sum(val2)
                try:
                    if float(sum2)/float(sum1)>0.5:
                        if summ2>sum1:
                            flag=True
                            #print str(i)
                except Exception:
                    continue
        if flag==False:

            Z1.append(val1)
            export_res2.write("\t"+'V'+str(i))
            export_res5.write("\t"+'V'+str(i))
            export_res3.write('V'+str(i)+"\t"+"Covariate"+"\t"+str(1)+"\n")
    
    export_res2.write("\n")
    export_res5.write("\n")
    Z1=np.array(Z1)
    Z=Z1
    Z=zip(*Z)
    Z=np.array(Z)
    sh=Z.shape
    print "stringency = ",[strategy]
    for i in range(sh[0]):
        val1=Z[i,:]
        #print sum(val1)
        #if sum(val)>2:
        if sum(val1)>2:
            val=[0 if x==1 else x for x in val1]
        else:
            val=val1
        me=np.mean(val)
        st=np.std(val)
        export_res2.write(header[i+1])
        export_res5.write(header[i+1])
        
        for j in range(sh[1]):
            if strategy=="conservative":
                #print header[i+1]
                export_res2.write("\t"+str(val1[j]))
                export_res5.write("\t"+str(val1[j]))
            else:
               #print header[i+1] 
               export_res2.write("\t"+str(val[j]))
               export_res5.write("\t"+str(val[j])) 
        export_res2.write("\n")
        export_res5.write("\n")
        Z_new.append(val)
        
    Z_new=zip(*Z_new)
    Z_new=np.array(Z_new)
    sh=Z_new.shape
    export_res5.close()
    Orderedheatmap.Classify(exportnam_bint)
    return exportnam,exportnam_bin,exportnam2,exportnam3
示例#17
0
NMF_power.fit(spec_matrix)
NM_power_weights = NMF_power.components_
NM_power_basis = NMF_power.transform(spec_matrix)
NM_weights_desc = pd.DataFrame(NM_power_weights.transpose()).describe()

print NM_weights_desc

print datetime.now(
) - startTime  # prints execution time of the cell: 4mins for 1 hour spec

#%% TRY nimfa methods.factorization.snmf #### sparse nonnegative matrix factorisation
snmf = nimfa.Snmf(spec_matrix,
                  seed="random_vcol",
                  rank=40,
                  max_iter=20,
                  version='r',
                  eta=1.,
                  beta=1e-4,
                  i_conv=10,
                  w_min_change=0)
snmf_fit = snmf()

#%% Looks better - sparse at least
SNMF_basis = snmf_fit.basis()
SNMF_weights = snmf_fit.coef()
SNMF_weights_desc = pd.DataFrame(SNMF_weights.transpose()).describe()
print SNMF_weights_desc

# with rank = 10 and max_iter = 12, bad results - most components just 0

#%%
示例#18
0
import numpy as np

import nimfa

V = np.random.rand(40, 100)
snmf = nimfa.Snmf(V, seed="random_c", rank=10, max_iter=12, version='r', eta=1.,
                  beta=1e-4, i_conv=10, w_min_change=0)
snmf_fit = snmf()
示例#19
0
def NMFAnalysis(expressionInputFile,NMFinputDir,Rank,platform,iteration=0,strategy="conservative"):

    root_dir = export.findParentDir(NMFinputDir)[:-1]
    if 'ExpressionInput' in root_dir:
        root_dir = export.findParentDir(root_dir)
    if 'NMF-SVM' in root_dir:
        root_dir = export.findParentDir(root_dir)
        
    export.findFilename(NMFinputDir)
        
    X=[]
    header=[]
    head=0
    exportnam=root_dir+'/NMF-SVM/NMF/round'+str(iteration)+'NMFsnmf_versionr'+str(Rank)+'.txt'
    export_res=export.ExportFile(exportnam)
    exportnam_bin=root_dir+'/NMF-SVM/NMF/round'+str(iteration)+'NMFsnmf_binary'+str(Rank)+'.txt'
    export_res1=export.ExportFile(exportnam_bin)
    exportnam_bint=root_dir+'/NMF-SVM/NMF/round'+str(iteration)+'NMFsnmf_binary_t_'+str(Rank)+'.txt'
    export_res5=export.ExportFile(exportnam_bint)
    MF_input = root_dir+'/NMF-SVM/ExpressionInput/exp.NMF-MarkerFinder.txt'
    export.customFileCopy(expressionInputFile,root_dir+'/NMF-SVM/ExpressionInput/exp.NMF-MarkerFinder.txt')
    export_res4=open(string.replace(MF_input,'exp.','groups.'),"w")
    export_res7=open(string.replace(MF_input,'exp.','comps.'),"w")
    exportnam2=root_dir+'/NMF-SVM/SubtypeAnalyses/round'+str(iteration)+'Metadata'+str(Rank)+'.txt'
    export_res2=export.ExportFile(exportnam2)
    exportnam3=root_dir+'/NMF-SVM/SubtypeAnalyses/round'+str(iteration)+'Annotation'+str(Rank)+'.txt'
    export_res3=export.ExportFile(exportnam3)
    #if 'Clustering' in NMFinputDir:
     #   count=1
      #  start=2
    #else:
    count=0
    start=1
    #print Rank
    for line in open(NMFinputDir,'rU').xreadlines():
        line=line.rstrip('\r\n')
        q= string.split(line,'\t')
        if head >count:
            val=[]
            val2=[]
            me=0.0
            
            for i in range(start,len(q)):
                try:
                    val2.append(float(q[i]))
                except Exception:
                    continue
            me=np.median(val2)
            for i in range(start,len(q)):
                try:
                    val.append(float(q[i]))
                except Exception:
                    val.append(float(me))
            #if q[1]==prev:
            X.append(val)
          
        else:
            export_res1.write(line)
            export_res.write(line)
            export_res1.write("\n")
            #export_res4.write(line)
            #export_res4.write("\n")
            export_res.write("\n")
            header=q
            head+=1
            continue   
    group=defaultdict(list)
        
    sh=[]
    X=np.array(X)
    #print X.shape
    mat=[]
    #mat=X
    mat=zip(*X)
    mat=np.array(mat)
    #print mat.shape
    #model = NMF(n_components=15, init='random', random_state=0)
    #W = model.fit_transform(mat)
    nmf = nimfa.Snmf(mat,seed="nndsvd", rank=int(Rank), max_iter=20,n_run=1,track_factor=False,theta=0.95)
    nmf_fit = nmf()
    W = nmf_fit.basis()
    W=np.array(W)
    #np.savetxt("basismatrix2.txt",W,delimiter="\t")
    H=nmf_fit.coef()
    H=np.array(H)
   # np.savetxt("coefficientmatrix2.txt",H,delimiter="\t")
    #print W.shape
    sh=W.shape
    export_res3.write("uid\tUID\tUID\n")
    if int(Rank)==2:
        par=1
    else:
        par=2
    #for i in range(sh[1]):
    #    val=W[:,i]
    #    me=np.mean(val)
    #    st=np.std(val)
    #    export_res2.write(header[i+1])
    #    for j in range(sh[0]):
    #        if float(W[i][j])>=float(me+(par*st)):
    #          
    #            export_res2.write("\t"+str(1))
    #        else:
    #            export_res2.write("\t"+str(0))
    #       
    #    export_res2.write("\n")
    if platform != 'PSI':
        sh=W.shape
        Z=[]
        export_res5.write("uid")
        export_res2.write("uid")
        for i in range(sh[1]):
            
            export_res5.write("\t"+'V'+str(i))
            export_res2.write("\t"+'V'+str(i))
            export_res3.write('V'+str(i)+"\t"+"Covariate"+"\t"+str(1)+"\n")
            
        export_res5.write("\n")
        export_res2.write("\n")
        export_res3.write("\n")
        for i in range(sh[0]):
            new_val=[]
            val=W[i,:]
            export_res2.write(header[i+1])
            export_res5.write(header[i+1])
            export_res4.write(header[i+1])
            flag=True
            for j in range(sh[1]):
                if W[i][j]==max(val) and flag:
                    export_res5.write("\t"+str(1))
                    export_res2.write("\t"+str(1))
                    new_val.append(1)
                    export_res4.write("\t"+str(j+1)+"\t"+'V'+str(j))
                    flag=False
                else:
                    export_res5.write("\t"+str(0))
                    export_res2.write("\t"+str(0))
                    new_val.append(0)
                
            Z.append(new_val)
            export_res5.write("\n")
            export_res2.write("\n")
            export_res4.write("\n")
        W=zip(*W)
        W=np.array(W)
        sh=W.shape
        Z=zip(*Z)
        Z=np.array(Z)
        for i in range(sh[0]):
            export_res.write('V'+str(i))
            export_res1.write('V'+str(i))
            for j in range(sh[1]):
                export_res.write("\t"+str(W[i][j]))
                export_res1.write("\t"+str(Z[i][j]))
            export_res.write("\n")
            export_res1.write("\n")
            
        export_res.close()
        export_res1.close()
        export_res2.close()
        export_res5.close()
        Orderedheatmap.Classify(exportnam_bint)    
        
        return exportnam,exportnam_bin,exportnam2,exportnam3
    
    else:
        W=zip(*W)
        W=np.array(W)
        sh=W.shape
        Z=[]
        for i in range(sh[0]):
            new_val=[]
            val=W[i,:]
            num=sum(i > 0.10 for i in val)
            if num >40 or num <3:
                compstd=True
            else:
                compstd=False
            me=np.mean(val)
            st=np.std(val)
            #print 'V'+str(i)
            export_res.write('V'+str(i))
            export_res1.write('V'+str(i))
           
            for j in range(sh[1]):
                
                if compstd:   
                    if float(W[i][j])>=float(me+(par*st)):
                    
                        export_res1.write("\t"+str(1))
                        new_val.append(1)
                    else:
                        export_res1.write("\t"+str(0))
                        new_val.append(0)
                else:
                    if float(W[i][j])>0.1:
                    
                        export_res1.write("\t"+str(1))
                        new_val.append(1)
                    else:
                        export_res1.write("\t"+str(0))
                        new_val.append(0)
                export_res.write("\t"+str(W[i][j]))
                
            Z.append(new_val)
            export_res.write("\n")
            export_res1.write("\n")
       # Z=zip(*Z)
        Z=np.array(Z)
        sh=Z.shape
        Z_new=[]
        val1=[]
        Z1=[]
        dellst=[]
        export_res2.write("uid")
        export_res5.write("uid")
        for i in range(sh[0]):
            indices=[]
            val1=Z[i,:]
            sum1=sum(val1)
            flag=False
            indices=[index for index, value in enumerate(val1) if value == 1]
            for j in range(sh[0]):
                val2=[]
                
                if i!=j:
                    val2=Z[j,:]
                    
                    sum2=sum([val2[x] for x in indices])
                    summ2=sum(val2)
                    try:
                        if float(sum2)/float(sum1)>0.5:
                            if summ2>sum1:
                                flag=True
                                #print str(i)
                    except Exception:
                        continue
            if flag==False:
    
                Z1.append(val1)
                export_res2.write("\t"+'V'+str(i))
                export_res5.write("\t"+'V'+str(i))
                export_res3.write('V'+str(i)+"\t"+"Covariate"+"\t"+str(1)+"\n")
        
        export_res2.write("\n")
        export_res5.write("\n")
        Z1=np.array(Z1)
        Z=Z1
        Z=zip(*Z)
        Z=np.array(Z)
        sh=Z.shape
            
        for i in range(sh[0]):
            val1=Z[i,:]
            #print sum(val1)
            #if sum(val)>2: 
            if sum(val1)>2:
                val=[0 if x==1 else x for x in val1]
            else:
                val=val1
            me=np.mean(val)
            st=np.std(val)
            export_res2.write(header[i+1])
            export_res5.write(header[i+1])
            for j in range(sh[1]):
                if strategy=="conservative":
                    export_res2.write("\t"+str(val1[j]))
                    export_res5.write("\t"+str(val1[j]))
                else:
                   export_res2.write("\t"+str(val[j]))
                   export_res5.write("\t"+str(val[j])) 
            export_res2.write("\n")
            export_res5.write("\n")
            Z_new.append(val)
        Z_new=zip(*Z_new)
        Z_new=np.array(Z_new)
        
        sh=Z_new.shape

        export_res5.close()
        Orderedheatmap.Classify(exportnam_bint)    
        if strategy=="conservative":
            return exportnam,exportnam_bin,exportnam2,exportnam3
        else:
            return exportnam,exportnam_bin,exportnam2,exportnam3
示例#20
0
    def train(self):
        # Run MF
        print "Running non-negative MF....", strftime("%Y-%m-%d %H:%M:%S",
                                                      gmtime())
        source_result = None
        if self.method == "nmf":
            modelnmf = nimfa.Nmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "lfnmf":
            modelnmf = nimfa.Lfnmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "nsnmf":
            modelnmf = nimfa.Nsnmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "pmf":
            modelnmf = nimfa.Pmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "psmf":
            modelnmf = nimfa.Psmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "snmf":
            modelnmf = nimfa.Snmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "sepnmf":
            modelnmf = nimfa.Sepnmf(self.r1,
                                    rank=self.rank,
                                    max_iter=self.iter)
        else:
            print "No model is being recognized, stopped."
            sys.exit(1)

        model = modelnmf()
        source_result = np.array(model.fitted())

        print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime())

        # Turn vector of per user into distribution
        # And calculate the dot similarity
        # Then find the best data
        print("Transfer user vector into distribution.",
              strftime("%Y-%m-%d %H:%M:%S", gmtime()))

        item_pdf1 = []
        for i in range(N_ITEM):
            count = 0
            pdf = np.zeros(11)
            for j in range(N_USER):
                t = self.r1[i][j]
                if t == 0.0:
                    t = source_result[i][j]

                # ignore the count if it is 0.
                if t < 1e-4:
                    continue

                idx = min(int(math.floor(t / 0.1)), 10)
                pdf[idx] += 1
                count += 1
            if count > 1:
                pdf = pdf / count
            # print count
            item_pdf1.append(pdf)

        item_pdf2 = []
        for i in range(N_ITEM):
            count = 0
            pdf = np.zeros(11)
            for j in range(N_USER):
                if self.r2[i][j] > 0:
                    count += 1
                    pdf[int(math.floor(self.r2[i][j] / 0.1))] += 1
            if count > 1:
                pdf = pdf / count
            item_pdf2.append(pdf)

        # Transform now for further use: matrix[user]
        # self.r1 = self.r1.T
        # self.r2 = self.r2.T

        print "Calculate cost matrix....", strftime("%Y-%m-%d %H:%M:%S",
                                                    gmtime())
        # Calculate cost matrix for items
        # matrix[item r1][item r2]

        # Uses 5 threads to run this slowest part.
        partition = 5
        matrix = [[] for i in range(partition)]

        threads = []
        ll = np.split(np.array(range(N_ITEM)), partition)
        for index in range(partition):
            thread = Thread(target=self.threadFunc,
                            args=(matrix[index], ll[index], item_pdf1,
                                  item_pdf2))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

        matrix = np.array(np.concatenate(matrix, axis=0))

        print "Matrix shape: ", matrix.shape

        print "Hungarian running maximum matching....", strftime(
            "%Y-%m-%d %H:%M:%S", gmtime())
        match1to2, match2to1 = hungarian.lap(matrix)
        print "End of matching!", strftime("%Y-%m-%d %H:%M:%S", gmtime())

        # Create item-matching version
        # trans[item in r2]
        trans = []
        for item2 in range(N_ITEM):
            trans.append(source_result[match2to1[item2]])
        trans = np.array(trans).T

        # Find most similar user pair
        print "Find most similar user pair..... Write file...", strftime(
            "%Y-%m-%d %H:%M:%S", gmtime())

        self.writeTrans(trans)

        print "Done, enter cpp mode", strftime("%Y-%m-%d %H:%M:%S", gmtime())
def sparse_color_deconvolution(im_rgb, w_init, beta):
    """Performs adaptive color deconvolution.

    Uses sparse non-negative matrix factorization to adaptively deconvolve a
    given RGB image into intensity images representing distinct stains.
    Similar approach to ``color_deconvolution`` but operates adaptively.
    The input RGB image `im_rgb` consisting of RGB values is first transformed
    into optical density space as a row-matrix, and then is decomposed as
    :math:`V = W H` where :math:`W` is a 3xk matrix containing stain vectors
    in columns and :math:`H` is a k x m*n matrix of concentrations for each
    stain vector. The system is solved to encourage sparsity of the columns
    of :math"`H` i.e. most pixels should not contain significant contributions
    from more than one stain. Can use a hot-start initialization from a color
    deconvolution matrix.

    Parameters
    ----------
    im_rgb : array_like
        An RGB image of type unsigned char, or a 3xN matrix of RGB pixel
        values.
    w_init : array_like
        A 3xK matrix containing the color vectors in columns. Should not be
        complemented with ComplementStainMatrix for sparse decomposition to
        work correctly.
    beta : double
        Regularization factor for sparsity of :math:`H` - recommended 0.5.

    Returns
    -------
    stains : array_like
        An rgb image with deconvolved stain intensities in each channel,
        values ranging from [0, 255], suitable for display.
    w : array_like
        The final 3 x k stain matrix produced by NMF decomposition.

    Notes
    -----
    Return values are returned as a namedtuple

    See Also
    --------
    histomicstk.preprocessing.color_deconvolution.ColorDeconvolution

    References
    ----------
    .. [1] J. Xu, L. Xiang, G. Wang, S. Ganesan, M. Feldman, N.N. Shih,
           H. Gilmore, A. Madabhushi, "Sparse Non-negative Matrix
           Factorization (SNMF) based color unmixing for breast
           histopathological image analysis," in IEEE Computer Graphics
           and Applications, vol.46,no.1,pp.20-9, 2015.
    """

    # determine if input is RGB or pixel-matrix format
    if len(im_rgb.shape) == 3:  # RBG image provided
        m = im_rgb.shape[0]
        n = im_rgb.shape[1]
        im_rgb = np.reshape(im_rgb, (m * n, 3)).transpose()
    elif len(im_rgb.shape) == 2:  # pixel matrix provided
        m = -1
        n = -1
        if im_rgb.shape[2] == 4:  # remove alpha channel if needed
            im_rgb = im_rgb[:, :, (0, 1, 2)]

    # transform input RGB to optical density values
    im_rgb = im_rgb.astype(dtype=np.float32)
    im_rgb[im_rgb == 0] = 1e-16
    ODfwd = color_conversion.rgb_to_od(im_rgb)

    if w_init is None:

        # set number of output stains
        K = 3

        # perform NMF without initialization
        Factorization = nimfa.Snmf(V=ODfwd,
                                   seed=None,
                                   rank=K,
                                   version='r',
                                   beta=beta)
        Factorization()

    else:

        # get number of output stains
        K = w_init.shape[1]

        # normalize stains to unit-norm
        for i in range(K):
            Norm = np.linalg.norm(w_init[:, i])
            if (Norm >= 1e-16):
                w_init[:, i] /= Norm
            else:
                print 'error'  # throw error

        # estimate initial H given p
        Hinit = np.dot(np.linalg.pinv(w_init), ODfwd)
        Hinit[Hinit < 0] = 0

        # perform regularized NMF
        Factorization = nimfa.Snmf(V=ODfwd,
                                   seed=None,
                                   W=w_init,
                                   H=Hinit,
                                   rank=K,
                                   version='r',
                                   beta=beta)
        Factorization()

    # extract solutions and make columns of "w" unit-norm
    w = np.asarray(Factorization.basis())
    H = np.asarray(Factorization.coef())
    for i in range(K):
        Norm = np.linalg.norm(w[:, i])
        w[:, i] /= Norm
        H[i, :] *= Norm

    # reshape H matrix to image
    if m == -1:
        stains_float = np.transpose(H)
    else:
        stains_float = np.reshape(np.transpose(H), (m, n, K))

    # transform type
    stains = np.copy(stains_float)
    stains[stains > 255] = 255
    stains = stains.astype(np.uint8)

    # build named tuple for outputs
    Unmixed = collections.namedtuple('Unmixed', ['Stains', 'W'])
    Output = Unmixed(stains, w)

    # return solution
    return Output
示例#22
0
def factorization(V, list_of_arrays):
    snmf = nimfa.Snmf(V, seed="random_c", rank=4, max_iter=20, version='r', eta=1.)
    snmf_fit = snmf()
    globals()[list_of_arrays[0]] = snmf.basis()
    globals()[list_of_arrays[1]] = snmf.coef()
示例#23
0
H_min_mat = np.zeros([20, 2])
W_min_mat = np.zeros([20, 2])

for b in range(0, 2):
    for t in range((20)):
        k = t + 1
        print(k)
        np.random.seed(0)
        w_r = np.random.random((40320, k))
        h_r = np.random.random((k, 285))
        betaa = (b) / 10.0
        snmf = nimfa.Snmf(np.matrix(data),
                          rank=k,
                          beta=betaa,
                          max_iter=1000,
                          W=w_r,
                          H=h_r,
                          version='r',
                          eta=1.,
                          min_residuals=0.0001)
        snmf_fit = snmf()
        W = snmf_fit.fit.W
        H = snmf_fit.fit.H
        spsh[t, b] = snmf_fit.fit.sparseness()[1]
        spsw[t, b] = snmf_fit.fit.sparseness()[0]
        evals[t, b] = snmf_fit.fit.evar()
        kl[t, b] = snmf_fit.distance(metric='kl')
        bet[t, b] = calculate_error(D, W, H)
        rss[t, b] = snmf_fit.fit.rss()

        #W_min_mat[t,b] = np.matrix.min(W)
示例#24
0
def nmf_init(mat: np.ndarray, num_clusters=int):
    nmf = nimfa.Snmf(mat, rank=num_clusters)
    nmf_fit = nmf()
    return nmf_fit.basis().astype(bool).astype(float)