Python Labelling.assignlabels примеры использования

Язык программирования: Python

Пространство имен/Пакет: labelling

Класс/Тип: Labelling

Метод/Функция: assignlabels

Примеров на hotexamples.com: 8

Python Labelling.assignlabels - 8 примеров найдено. Это лучшие примеры Python кода для labelling.Labelling.assignlabels, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Labelling(6)

metadata(6)

assignlabels(5)

correlate(3)

grounded(2)

getlabels(1)

save_labels(1)

search_classes(1)

unify_label(1)

write_classes(1)

Пример #1

Показать файл

Файл: microbPlotter.py Проект: sperez8/microbPLSA

def topic_distribution(name = None, study = None, order = None, **options):
    '''Given a model p_z,p_w_z,p_d_z, we can plot the document's distribution
    using p(z|d) = normalized((p(d|z)*p(z))) '''
    
    m = microbplsa.MicrobPLSA()
    m.open_model(name = name, study = study, **options) #get model from the results file
    #return document's distribution
    p_z_d = m.model.document_topics()
    
    Z,N =p_z_d.shape #number of samples
    if order is not None:
        p_z_d = p_z_d[:,order]
    n = np.arange(N)
    width = 25.0/float(N) #scale width of bars by number of samples
    p = [] #list of plots
    colors = plt.cm.rainbow(np.linspace(0, 1, Z))    
    
    Lab = Labelling(m, ignore_continuous = False)
    Lab.metadata(non_labels = ['BarcodeSequence'])
    R = Lab.correlate()
    labels_r = Lab.assignlabels(R,num_labels = 1)
    labels, r = zip(*labels_r)
    labels = [l.replace('(','\n(') for l in labels]
    
    #sort and organize labels and topics so they are always plotted in the same order
    labelsUnsorted = zipper(labels,range(0,Z))
    labelsUnsorted.sort()
    labels, Zrange = zip(*labelsUnsorted)
    Zrange = list(Zrange)
    p.append(plt.bar(n, p_z_d[Zrange[0],:], width, color=colors[0], linewidth = 0))
    height = p_z_d[Zrange[0],:]
    for i,z in enumerate(Zrange[1:]):
        p.append(plt.bar(n, p_z_d[z,:], width, color=colors[i+1], bottom=height, linewidth = 0))
        height += p_z_d[z,:]
    
    
    plt.ylabel('Probability P(z|d)')
    plt.xlabel('Sample')
    plt.title('Sample\'s topic distribution')
    #plt.xticks(np.arange(0,width/2.0,N*width), ['S'+str(n) for n in range(1,N)])

    topiclegend = ['Topic' + str(Zrange[labels.index(l)]+1) + ': '+ l + '\n ('+ str(r[Zrange[labels.index(l)]]) + ')' for l in labels]
    fontP = FontProperties()
    if N >60:
        fontP.set_size('xx-small')
    else: fontP.set_size('small')
    ax = plt.subplot(111)
    ratio = float(N)*0.5
    ax.set_aspect(ratio)
    ax.tick_params(axis = 'x', colors='w') #remove tick labels by setting them the same color as background
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, 0.5, box.height])

    if order is not None:
        plt.xticks(n, order, size = 'xx-small')
    if Z > 12: 
        columns = 2
    else: columns = 1
    plt.legend(p, topiclegend, prop = fontP, title = 'Topic Label', loc='center left', bbox_to_anchor=(1, 0.5), ncol = columns)
    return plt

Пример #2

Показать файл

Файл: pca.py Проект: sperez8/microbPLSA

def makePCA(datafile, num_components):
    m = microbplsa.MicrobPLSA()
    m.open_data(datafile = dataFile) #get data of OTU abundances per sample
    X = m.datamatrix.T
    
    plsa = m.open_model(modelFile = resultfile) #get model from the results file
    #return document's distribution
    p_d_z = plsa.p_d_z
    N,Z =p_d_z.shape
    
    #get topic labels
    if MANUAL_LABELS:
        labels  = MANUAL_LABELS
    else:
        Lab = Labelling(study, Z, ignore_continuous = False)
        Lab.metadata(non_labels = ['BarcodeSequence'])
        R = Lab.correlate()
        labels_r = Lab.assignlabels(R,num_labels = 1)
        labels, r = zip(*labels_r)
        labels = [l.replace('(','\n(') for l in labels]
    
    #get primary topic per sample
    topics = []
    for i, row in enumerate(p_d_z):
        max_topic_index = np.argmax(row)
        topics.append(max_topic_index)    
    topics = np.array(topics)
    pca = PCA(n_components=num_components, whiten = True)
    pca.fit(X)
    X_r = pca.fit(X).transform(X)
    
    # Percentage of variance explained for each components
    print('Explained variance ratio (first two components): %s'
          % str(pca.explained_variance_ratio_))
    
    #initiate plot and colors
    colors = [float(c)/float(Z) for c in range(0,Z)]
    colors = plt.cm.rainbow(np.linspace(0, 1, Z))
    fig = plt.figure(1, figsize=(4, 3))
    plt.clf()
    ax = plt.subplot(111, projection ='3d')
    if num_components == 2:
        for c, i, l in zip(colors, range(0,Z), labels):
            ax.plot(X_r[topics == i, 0], X_r[topics == i, 1], 'o', color=c, label=l)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, 0.5, box.height])
    elif num_components == 3:
        for c, i, l in zip(colors, range(0,Z), labels):
            ax.plot(X_r[topics == i, 0], X_r[topics == i, 1], X_r[topics == i, 2], 'o', color=c, label=l)
    fontP = FontProperties()
    if Z > 12: 
        columns = 2
    else: columns = 1
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, 0.5, box.height])
    plt.legend(prop = fontP, loc='center left', bbox_to_anchor=(1, 0.5), ncol = columns)
    plt.title('PCA of Study %s with Z=%s' %(study, str(z)))

    plt.show()
    return None

Пример #3

Показать файл

Файл: run_assign_labels.py Проект: sperez8/microbPLSA

'''
Created on 22/01/2014

author: sperez8

Shows how to use Labelling class
'''

from labelling import Labelling

study = '1526'
simple = False
for Z in range(2,39):
    Lab = Labelling(study, Z, debug = False,ignore_continuous = False, adjusted_metadata = True)
    if simple:
         labels = Lab.getlabels()
    else:
        Lab.metadata(non_labels = ['BarcodeSequence'])
        m = list(Lab.metadatamatrix[:,7])
        transf = {'DRY':1, "SAFE":2, "DIPPING":3, "UNDER_ISH":4}
        M = [transf[n] for n in m]
        print M
        if 5 in M: print "DDJDJD        "
        import sys
        sys.exit()
        sys.exit()
        R = Lab.correlate()
        labels = Lab.assignlabels(R,num_labels = 8)
        
    Lab.save_labels(labels)

Пример #4

Показать файл

f = '/Users/sperez/git/microbPLSA/MicrobProcessor/Results/study_' + study + '_' + str(
    z) + '_topics_.txt'
datafile = '/Users/sperez/Documents/PLSAfun/EMPL data/study_' + study + '_split_library_seqs_and_mapping/study_' + study + '_closed_reference_otu_table.biom'

m = microbplsa.MicrobPLSA()
plsa = m.open_model(f)  #get model from the results file
p_z_d = plsa.document_topics()  #return document's distribution
Z, N = p_z_d.shape  #number of samples

Lab = Labelling(study, Z, ignore_continuous=False,
                adjusted_metadata=True)  #get labels!
x, y, z = Lab.metadata(non_labels=[])
print y
R = Lab.correlate()
labels_r = Lab.assignlabels(R, num_labels=1)
print labels_r
oldlabels, r = zip(*labels_r)
goodlabels = []
for lab, r in labels_r:
    if r > CORRELATION_THRESHOLD or r < -CORRELATION_THRESHOLD:
        goodlabels.append(lab)
print("Only %i/%i passed the correlation threshold of %1.1f" %
      (len(goodlabels), len(oldlabels), CORRELATION_THRESHOLD))

labels = [replace(l, ' (', '_') for l in oldlabels]
labels = [replace(l, ' ', '_') for l in labels]
labels = [replace(l, ')', '') for l in labels]
labels = [replace(l, ':', '_') for l in labels]
labels = [replace(l, '.', '_') for l in labels]
labels = [replace(l, '-', '_') for l in labels]

Пример #5

Показать файл

def makePCA(datafile, num_components):
    m = microbplsa.MicrobPLSA()
    m.open_data(datafile=dataFile)  #get data of OTU abundances per sample
    X = m.datamatrix.T

    plsa = m.open_model(modelFile=resultfile)  #get model from the results file
    #return document's distribution
    p_d_z = plsa.p_d_z
    N, Z = p_d_z.shape

    #get topic labels
    if MANUAL_LABELS:
        labels = MANUAL_LABELS
    else:
        Lab = Labelling(study, Z, ignore_continuous=False)
        Lab.metadata(non_labels=['BarcodeSequence'])
        R = Lab.correlate()
        labels_r = Lab.assignlabels(R, num_labels=1)
        labels, r = zip(*labels_r)
        labels = [l.replace('(', '\n(') for l in labels]

    #get primary topic per sample
    topics = []
    for i, row in enumerate(p_d_z):
        max_topic_index = np.argmax(row)
        topics.append(max_topic_index)
    topics = np.array(topics)
    pca = PCA(n_components=num_components, whiten=True)
    pca.fit(X)
    X_r = pca.fit(X).transform(X)

    # Percentage of variance explained for each components
    print('Explained variance ratio (first two components): %s' %
          str(pca.explained_variance_ratio_))

    #initiate plot and colors
    colors = [float(c) / float(Z) for c in range(0, Z)]
    colors = plt.cm.rainbow(np.linspace(0, 1, Z))
    fig = plt.figure(1, figsize=(4, 3))
    plt.clf()
    ax = plt.subplot(111, projection='3d')
    if num_components == 2:
        for c, i, l in zip(colors, range(0, Z), labels):
            ax.plot(X_r[topics == i, 0],
                    X_r[topics == i, 1],
                    'o',
                    color=c,
                    label=l)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, 0.5, box.height])
    elif num_components == 3:
        for c, i, l in zip(colors, range(0, Z), labels):
            ax.plot(X_r[topics == i, 0],
                    X_r[topics == i, 1],
                    X_r[topics == i, 2],
                    'o',
                    color=c,
                    label=l)
    fontP = FontProperties()
    if Z > 12:
        columns = 2
    else:
        columns = 1
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, 0.5, box.height])
    plt.legend(prop=fontP,
               loc='center left',
               bbox_to_anchor=(1, 0.5),
               ncol=columns)
    plt.title('PCA of Study %s with Z=%s' % (study, str(z)))

    plt.show()
    return None

Пример #6

Показать файл

Файл: plot_topiclabel_scatter.py Проект: sperez8/microbPLSA

Z = 8
only_continuous = True

f = '/Users/sperez/git/microbPLSA/MicrobProcessor/Results/study_'+study +'_'
end = '_topics_.txt'

datafile = f+str(Z)+end

format = 'pdf'



Lab = Labelling(study, Z)
metatable, factor_types, factors = Lab.metadata()
R = Lab.correlate()
labels_r = Lab.assignlabels(R)
labels, r = zip(*labels_r)
labels = [l.replace('(','\n(') for l in labels]


m = microbplsa.MicrobPLSA()
plsa = m.open_model(datafile) #get model from the results file

#return document's distribution
p_z_d = plsa.document_topics()

colorlabel = list(metatable[:,1])
    
    
if format == 'svg':
    import matplotlib

Пример #7

Показать файл

Файл: make_parallel_coords_data.py Проект: sperez8/microbPLSA

CORRELATION_THRESHOLD = 0.0
pcoordfile = _root_dir +'/D3/pcplots/topics.js'

f = '/Users/sperez/git/microbPLSA/MicrobProcessor/Results/study_'+study +'_'+str(z)+'_topics_.txt'
datafile = '/Users/sperez/Documents/PLSAfun/EMPL data/study_'+study+'_split_library_seqs_and_mapping/study_'+study+'_closed_reference_otu_table.biom'

m = microbplsa.MicrobPLSA()
plsa = m.open_model(f) #get model from the results file
p_z_d = plsa.document_topics() #return document's distribution
Z,N =p_z_d.shape #number of samples
        
Lab = Labelling(study, Z, ignore_continuous = False, adjusted_metadata = True) #get labels!
x,y,z = Lab.metadata(non_labels = [])
print y
R = Lab.correlate()
labels_r = Lab.assignlabels(R,num_labels = 1)
print labels_r
oldlabels, r = zip(*labels_r)
goodlabels = []
for lab, r in labels_r:
    if r > CORRELATION_THRESHOLD or r < -CORRELATION_THRESHOLD:
        goodlabels.append(lab)
print ("Only %i/%i passed the correlation threshold of %1.1f"%(len(goodlabels), len(oldlabels), CORRELATION_THRESHOLD))

labels = [replace(l,' (', '_') for l in oldlabels]
labels = [replace(l,' ', '_') for l in labels]
labels = [replace(l,')','') for l in labels]
labels = [replace(l,':', '_') for l in labels]
labels = [replace(l,'.', '_') for l in labels]
labels = [replace(l,'-', '_') for l in labels]
samplenames = Lab.metadatamatrix[:,0]

Пример #8

Показать файл

study = '1526'
Z = 8
only_continuous = True

f = '/Users/sperez/git/microbPLSA/MicrobProcessor/Results/study_' + study + '_'
end = '_topics_.txt'

datafile = f + str(Z) + end

format = 'pdf'

Lab = Labelling(study, Z)
metatable, factor_types, factors = Lab.metadata()
R = Lab.correlate()
labels_r = Lab.assignlabels(R)
labels, r = zip(*labels_r)
labels = [l.replace('(', '\n(') for l in labels]

m = microbplsa.MicrobPLSA()
plsa = m.open_model(datafile)  #get model from the results file

#return document's distribution
p_z_d = plsa.document_topics()

colorlabel = list(metatable[:, 1])

if format == 'svg':
    import matplotlib
    matplotlib.use('SVG')
else: