Exemplos de ContactMap em Python, exemplos de contact_map.ContactMap em Python

Exemplo n.º 1

0

Exibir arquivo

def gene_bin(para, data='YeastSyn'):
    show("""
    Check gene distributions among the bins in heatmap
    """, True)
    map1 = ContactMap()
    assert map1.load(data, False)
    GE = read_gene(para['DataPath']+'/SGD/SGD_features.tab')
    bin_idx = []
    for gid in GE:
        ch, st, ed = GE[gid]
        try:
            idx = map1.choose_map_loc([int(ch)], [int(st)-1])
            for i in idx:
                if i >= 0:
                    bin_idx.append(i)
        except:
            print 'Skip', ch, st, gid
    n = map1.contact_map.shape[0]
    cout = histogram(bin_idx, range(n), False)
    for i in xrange(n):
        show(map1.idx2chr[map1.frag_chr[i]])
        show(map1.frag_sta[i])
        show(map1.frag_end[i])
        show(cout[i])
        show()

Exemplo n.º 2

0

Exibir arquivo

def run1(name='demo'):
    show('''
    Compare decomposation results during iterations
    ''', True)
    map1 = ContactMap(name)
    assert map1.load()
    map1.decompose_auto()
    n, r = map1.contact_group.shape
    print n, r
    map1.decompose('NND', dim_num=r, A=map1.get_null_map())
    map2 = map1.duplicate()
    map2.decompose(dim_num=r)
    ref = np.array(map2.contact_group)
    show('Iter\tObj.\t# out of %s' % r)
    show('Corr. Mean\tCorr. STD\tMetric\n')
    from contact_map import gini_impurity
    for i in [1, 5, 10, 50] + range(100, 1201, 100):
        show(i)
        map2 = map1.duplicate()
        show(map2.decompose(dim_num=r, max_iter=i, stop_thrd=0))
        ## match dims
        s = []
        v = []
        for j in xrange(r):
            srt, val, pval = map2.test_enrichment(ref[:, j], 'PCC')
            s.append(srt[0])
            v.append(val[srt[0]])
        show(len(set(s)))
        show(mean_std(v))
        show(gini_impurity(np.diag(map2.group_map)))
        show()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: gene_set.py Projeto: huxihao/BNMF

def bin_count(infile, pdf):
    if not os.path.exists(infile): return
    from contact_map import ContactMap
    map1 = ContactMap()
    map1.load('YeastHiC')
    n = map1.contact_map.shape[0]
    idx, name = map1.get_locations(infile)
    srt, val, pval = map1.test_enrichment(idx, 'AUC', title=infile, plot=pdf, pages=9)
    show(infile)
    show(len(idx))
    show(len(set(idx)))
    show(pval[srt[0]])
    sign = [i for i in srt if pval[i] < 0.01]
    show(sign)
    show()
    if infile.find('telomere') > -1 or infile.find('tRNA') > -1 or infile.find('paralogs') > -1:
        outfile = open(infile+'_val.csv', 'w')
        outfile.write('Name,Bin Idx,Membership\n')
        for Name, Idx in zip(name, idx):
            outfile.write('%s,%s'%(Name, Idx))
            for i in sign:
                outfile.write(',%s'%map1.contact_group[Idx,i])
            outfile.write('\n')
        outfile.close()
    os.remove(infile)

Exemplo n.º 4

0

Exibir arquivo

 def __init__(self, exepath, name='DataName', enzyme='Enzyme', sparse=False):
     ContactMap.__init__(self, name, enzyme, sparse)
     path = os.path.abspath(exepath)
     if not os.path.exists(path):
         print 'Please install domaincall to', path
         print 'The project is at https://github.com/kingsfordgroup/armatus'
         exit(0)
     self.exepath = path

Exemplo n.º 5

0

Exibir arquivo

def one_cell(path, pdf, cell, genome):
    map1 = ContactMap()
    map1.genome_info(path + '/%s_chr_len.txt' % genome)
    for i in sorted(map1.idx2chr.keys()):
        info = one_chr(path=path,
                       cell=cell,
                       genome=genome,
                       ch=map1.idx2chr[i],
                       pdf=pdf)

Exemplo n.º 6

0

Exibir arquivo

def run4(name='demo'):
    show(
        '''
    Compare the change of clusters under different resolutions and iterations
    ''', True)
    map1 = ContactMap(name)
    assert map1.load()
    dims = [10, 15, 20, 25, 30, 35, 40, 45, 50]
    map1.decompose_auto(dim_num=dims)
    memb = np.array(map1.contact_group * map1.group_map)
    show('')
    show(dims, True)
    for ratio in xrange(1, 4):
        map2 = map1.duplicate()
        map2.get_interactions()
        map2.create_binnedmap(binsize=map1.get_binsize() * ratio)
        map2.mask_diag()
        paras = map2.decompose_auto(dim_num=dims)
        bins, vals = zip(*paras)
        idx, val = map1.get_locations(map2.output_groups(), st=0, ch=0, po=1)
        newv = np.array(map2.contact_group * map2.group_map)
        show(map2.get_binsize())
        show(vals, True)

    show(map1.contact_group.shape, True)
    show('\n# of Iter.')
    show(dims, True)
    for i in xrange(10):
        it = i * 100
        paras = map1.decompose_auto(max_iter=it, update=True, dim_num=dims)
        bins, vals = zip(*paras)
        show(it)
        show(vals, True)

Exemplo n.º 7

0

Exibir arquivo

def bin_count(infile, pdf):
    if not os.path.exists(infile): return
    from contact_map import ContactMap
    map1 = ContactMap()
    map1.load('YeastHiC')
    n = map1.contact_map.shape[0]
    idx, name = map1.get_locations(infile)
    srt, val, pval = map1.test_enrichment(idx,
                                          'AUC',
                                          title=infile,
                                          plot=pdf,
                                          pages=9)
    show(infile)
    show(len(idx))
    show(len(set(idx)))
    show(pval[srt[0]])
    sign = [i for i in srt if pval[i] < 0.01]
    show(sign)
    show()
    if infile.find('telomere') > -1 or infile.find('tRNA') > -1 or infile.find(
            'paralogs') > -1:
        outfile = open(infile + '_val.csv', 'w')
        outfile.write('Name,Bin Idx,Membership\n')
        for Name, Idx in zip(name, idx):
            outfile.write('%s,%s' % (Name, Idx))
            for i in sign:
                outfile.write(',%s' % map1.contact_group[Idx, i])
            outfile.write('\n')
        outfile.close()
    os.remove(infile)

Exemplo n.º 8

0

Exibir arquivo

def run2(name='demo'):
    show('''
    Show the distribution of objective values
    ''', True)
    map1 = ContactMap(name)
    assert map1.load()
    map1.decompose_auto(max_iter=50)
    n, r = map1.contact_group.shape
    map2 = map1.duplicate()
    objs = []
    for i in xrange(100):
        map2.reset_solution()
        objs.append(map2.decompose(dim_num=r, max_iter=50))
    max_obj = int(max(objs)) + 1
    min_obj = int(min(objs)) - 1
    bins = range(min_obj, max_obj, int((max_obj - min_obj) / 10))
    show('')
    show(bins, True)
    show('Frequency')
    show(histogram(objs, bins), True)
    show('Mean\tSTD\n')
    m, s = mean_std(objs)
    show([m, s], True)

    map2.decompose('Null', dim_num=r)
    obj1 = map2.decompose(dim_num=r)
    show('Use Null init has obj.')
    show(obj1)
    show('and Z-score is')
    show((obj1 - m) / s, True)

Exemplo n.º 9

0

Exibir arquivo

def map_contacts(fname):

    traj = md.load(fname)
    contacts = ContactMap(traj[0])
    (fig, ax) = contacts.residue_contacts.plot(cmap='seismic', vmin=-1, vmax=1)
    plt.xlabel("Residue")
    plt.ylabel("Residue")

    figname = os.path.splitext(fname)[0] + '.png'
    plt.savefig(figname, bbox_inches='tight')
    plt.show()

    return contacts

Exemplo n.º 10

0

Exibir arquivo

def go_test(para,  data='YeastHiC'):
    show("""
    Check gene groups relating to the same GO term
    """, True)
    MP = read_map(para['DataPath']+'/SGD/go_slim_mapping.tab')
    CO = read_complex(para['DataPath']+'/SGD/go_protein_complex_slim.tab')
    GO = read_go(para['DataPath']+'/SGD/go_terms.tab')
    GE = read_gene(para['DataPath']+'/SGD/SGD_features.tab')
    cc = 0
    go2gene = MP.copy()
    go2gene.update(CO)

    map1 = ContactMap()
    assert map1.load(data)
    map1.output_groups()
    for go in go2gene:
        go = go.strip()
        fname = save_gogene(go, go2gene, GE)
        idx, names = map1.get_locations(fname, st=1, ch=1, po=2, nm=0)
        os.remove(fname)
        if len(idx) < 1:
            continue
        srt, val, pval = map1.test_enrichment(idx, method='AvgCCD')
        cc += 1
        sign = []
        for i in srt:
            if pval[i] < 0.01:
                sign.append(i)
        if len(sign) > 0:
            show(go)
            show(len(idx))
            show(GO[int(go.split(':')[1])])
            show(pval[sign[0]])
            show(pval[sign[-1]])
            show(sign)
            show()
    show('We tested %s GO terms for %s.\n'%(cc,data))

Exemplo n.º 11

0

Exibir arquivo

def chr_detail(path, cell, genome, ch, loci, st=0, ed=None, pdf=None):
    map1 = ContactMap('tad-detail-%s-in-%s'%(ch,cell))
#    map1.clear()
    if not map1.load():
        map1.genome_info(path+'/%s_chr_len.txt'%genome)
        map1.focus_chromosome(ch)
        map1.create_binnedmap(40e3, lazy=True)
#        map1.contact_map = np.asmatrix(np.loadtxt(path+'/'+cell+'/uij.'+ch))
        map1.contact_map = np.asmatrix(np.loadtxt(path+'/'+cell+'/nij/nij.'+ch))
        print cell, ch, map1.frag_sta.shape[0], map1.contact_map.shape[0]
        assert map1.frag_sta.shape[0] == map1.contact_map.shape[0]
        map1.get_sparse_interactions()
        map1.focus_chromosome(ch, st=st, ed=ed)
        map1.create_contactmap(throw=0)
        map1.save()
    show(cell)
    show(ch)
    show(map1.contact_map.shape)
    map1.mask_diag()
    map1.mask_short()
    map1.decompose_auto(par_lam=1, beta=3, update=False)
    map1.sort_groups()
#    map1.add_bias_back()
    show(map1.contact_group.shape)
    show()
    if pdf is not None:
        map1.plot_map()
        pdf.savefig(); plt.clf()
        map1.plot_map(map1.contact_group*map1.group_map*map1.contact_group.T, vmin=0.01, title='H*S*H.T')
        pdf.savefig(); plt.clf()
        map1.plot_submap()
        pdf.savefig(); plt.clf()
    TAD_st, _ = map1.get_locations(path+'/'+cell+'/'+domain, st=0, ch=0, po=1, add=0)
    TAD_ed, _ = map1.get_locations(path+'/'+cell+'/'+domain, st=0, ch=0, po=2, add=-1)
    TAD = zip(TAD_st, TAD_ed)

    W = np.asarray(map1.contact_group * map1.group_map)
    Wsum = W.sum(1)
    W /= Wsum[Wsum>0].mean()
    gini = 1-np.power(W,2).sum(1)
    gini[Wsum==0] = 0

    if loci is not None:
        loc = map1.choose_map_loc(loci)
    else:
        loc = []
    grps = W[loc,:].sum(0) > 0
    map1.output_groups()
    show(loc, True)
    sel = np.arange(0, 40)
#    if pdf is not None:
#        plt.plot(sel, gini[sel], 'k.--')
    for i in xrange(W.shape[1]):
        if grps[i]:
            if pdf is not None:
                plt.plot(sel, W[sel,i], label='C%s'%i)
    tad = []
    for i,j in TAD:
        tad.append(j-i)
        if i in sel and j in sel:
            if pdf is not None:
                plt.plot([i,j], [1.1,1.1], 'k-', linewidth=2)
    if pdf is not None:
        plt.plot(loc, [1]*len(loc), 'r.')
        xt = sel[::(len(sel)/5)]
        plt.xticks(xt, ['%sM'%(X*0.04+st*1e-6) for X in xt])
        plt.ylim([0,1.2])
        plt.xlim([sel.min(), sel.max()])
        pdf.savefig(); plt.clf()
    if pdf is not None:
        map1.plot_map(map1.contact_map[sel,:][:,sel])
        pdf.savefig(); plt.clf()
        map1.plot_map(map1.contact_group[sel,:]*\
                      map1.group_map*\
                      map1.contact_group[sel,:].T,
                      vmin=0.01, title='H*S*H.T')
        pdf.savefig(); plt.clf()
    return map1

Exemplo n.º 12

0

Exibir arquivo

def run1(para):
    show('''
    Decompose headmap and show clusters in PDB format
    ''', True)
    path = para['DataPath']+'/Duan2010N'
    map1 = ContactMap('PDBMAP')
    map1.genome_info(path+'/restriction_fragments_mappable_HindIII.txt', i2=3, i3=0)
    map1.add_interactions(path+'/interactions_HindIII_fdr0.01_inter.txt')
    map1.add_interactions(path+'/interactions_HindIII_fdr0.01_intra.txt')
    map1.create_binnedmap(10000)
    map1.decompose_auto(dims=range(5,51,5))
    map_idx, pdb_idx = map1.get_locations(path+'/3d_model_of_yeast_with_genomic_positions.txt', st=1, ch=0, po=1, nm=-1)
    H = map1.contact_group
    n,r = H.shape
    for i in xrange(3):
        members = set()
        for j in xrange(n):
            if H[j,i] > 1:
                members.add(j)
        mark_idx = [ip for im,ip in zip(map_idx, pdb_idx) if im in members]
        output_pdb('Yeast3D-C%s.pdb'%i, path+'/3d_model_of_yeast_genome.pdb', mark_idx)

Exemplo n.º 13

0

Exibir arquivo


import pandas as pd
import matplotlib.pyplot as plt
import mdtraj as md
from contact_map import ContactMap, ContactFrequency, ContactDifference

pdb = 'poses/snx_chanel'
traj = md.load_pdb(pdb + '.pdb')
print(traj)
topology = traj.topology

tox = topology.select("segname TOX")
cav = topology.select("segname R1 R2 R3 R4")

frame_contacts = ContactMap(traj[0], query=tox, haystack=cav, cutoff=0.35)
#print (frame_contacts.residue_contacts.df)
df = frame_contacts.residue_contacts.df

(fig, ax) = frame_contacts.residue_contacts.plot(cmap='seismic',
                                                 vmin=-1,
                                                 vmax=1)

tox_residues_id = residue_for_atoms_id(tox, traj.topology)
cav_residues_id = residue_for_atoms_id(cav, traj.topology)
tox_residues = residue_for_atoms_name(tox_residues_id, traj.topology)
cav_residues = residue_for_atoms_name(cav_residues_id, traj.topology)
cav_residues_ori = residue_for_atoms_original(cav_residues_id, traj.topology)
ax.set_xlim(min(cav_residues_id), max(cav_residues_id) + 1)
ax.set_ylim(min(tox_residues_id), max(tox_residues_id) + 1)
#segment_for_residue(topology)

Exemplo n.º 14

0

Exibir arquivo

def decompose_dist(pdf, curve, r=None):
    'Decompose the Euc distance matrix on curve'
    from contact_map import ContactMap, EIG, NMF_main
    map1 = ContactMap()
    curve_show(curve)
    pdf.savefig()
    plt.clf()
    verts = np.array(curve)
    map1.plot_map(verts, title="Verteces", log=False)
    pdf.savefig()
    plt.clf()
    from scipy.spatial.distance import pdist, squareform
    ds = squareform(pdist(verts, 'euclidean'))

    ## transform
    #    V = ds.max() - ds
    #    V = ds.max() / (ds + 1)
    V = ds.max() * ((ds + 1)**-2)
    map1.plot_map(V, title="Synthetic Heatmap", log=True)
    pdf.savefig()
    plt.clf()

    #plt.hist(np.reshape(V,(-1)), bins=100, normed=1, facecolor='blue')
    #plt.title('Distribution of map values')
    #pdf.savefig(); plt.clf()
    plt.loglog([(i + 1.0) / V.shape[0] for i in range(V.shape[0])],
               trace_sum(V),
               linestyle='-.')
    plt.title('Distribution of interactions along 1D')
    plt.xlabel('Ratio of linked locations to the total length')
    plt.ylabel('Number of observed links')
    pdf.savefig()
    plt.clf()

    if r == None:
        r = choose_size(pdf, V, 9)
        show('Best number of dimentions is %s\n' % r)
        r = 4
    if False:  ## try PCA
        U = (V - np.mean(V.T, axis=1)).T
        Q, M = EIG(np.cov(U), r)
    else:
        Q, M = EIG(V, r)
    map1.plot_map(Q, title='Eig. Decomp. - Q Matrix', log=False)
    pdf.savefig()
    plt.clf()
    map1.plot_map(M, title='Eig. Decomp. - M Matrix', log=False)
    pdf.savefig()
    plt.clf()
    map1.plot_map(Q * M * Q.T, title='Eig. Decomp. - Recovered', log=False)
    pdf.savefig()
    plt.clf()
    sep_map_show(pdf, verts, Q)

    H, S, obj = NMF_main(V, J='NMF-PoissonManifoldEqual', H=Q, S=M, r=r)
    map1.plot_map(H * S * H.T, title='NMF Decomp. - Recovered', log=False)
    pdf.savefig()
    plt.clf()
    map1.plot_map(H, title='NMF Decomp. - H Matrix', log=False)
    pdf.savefig()
    plt.clf()
    map1.plot_map(S, title='NMF Decomp. - S Matrix', log=False)
    pdf.savefig()
    plt.clf()
    maxp = np.argmax(np.asarray(H), 0)
    srt = np.argsort(maxp)
    sep_map_show(pdf, verts, H[:, srt])

    try:
        from sklearn.cluster import KMeans
        km = KMeans(n_clusters=r)
        H = -np.matrix(km.fit_transform(V))
        S = np.matrix(np.eye(r))
        maxp = np.argmax(np.asarray(H), 0)
        srt = np.argsort(maxp)
        map1.plot_map(H, title='K-means Decomp. - H Matrix', log=False)
        pdf.savefig()
        plt.clf()
        sep_map_show(pdf, verts, H[:, srt])
    except:
        print 'Please install SK-kit to run K-means'
        pass

Exemplo n.º 15

0

Exibir arquivo

def get_syn_map(para, bin_size=3200, with_bias=True):
    pdf = PdfPages(para['ExeFile'] + 'plot1.pdf')
    ## prepare
    map1 = ContactMap('Syn3D')
    map1.genome_info(para['DataPath'] + '/Tjong2012GR/yeast_chr_len-Tjong.txt')
    map1.create_binnedmap(3200)  ## fixed
    map2 = map1.duplicate()
    map3 = map1.duplicate()

    ## obtain links from PDB
    link_map = np.load('syn_link.npy')
    if with_bias:  ## add random bias
        np.random.seed(0)
        bias = np.random.random(link_map.shape[0])
        link_map *= np.outer(bias, bias)
        print link_map.min(), link_map.max(), link_map.mean()
        link_map = np.floor(link_map)  ## sampling bias
    map1.contact_map = np.matrix(link_map, dtype='float')

    output = open(
        'syn_yeast_map_bin%s%s.txt' % (bin_size, 'bias' if with_bias else ''),
        'w')
    ch = map1.frag_chr
    po = (map1.frag_sta + map1.frag_end) / 2
    for i in xrange(link_map.shape[0]):
        for j in xrange(link_map.shape[1]):
            if link_map[i, j] > 0:
                output.write('%s\t%s\t%s\t%s\t0\t%s\t1e-10\t1e-10\n' %
                             (ch[i], po[i], ch[j], po[j], link_map[i, j]))
        output.write('\n')
    output.close()

    map1.get_interactions()
    map1.create_binnedmap(bin_size)
    map1.mask_diag()
    map1.plot_map(title='Heatmap for the number of links')
    pdf.savefig()
    plt.clf()
    map1.decompose('NND')
    idx, names = map2.get_locations(map1.output_groups(),
                                    st=0,
                                    ch=0,
                                    po=1,
                                    nm=0,
                                    add=0)
    dist_map = np.load('syn_dist.npy')
    dist = dist_map[idx, :][:, idx]
    map1.plot_map(dist, title='Heatmap for the average distances', log=False)
    pdf.savefig()
    plt.clf()
    pdf.close()
    return map1, dist

Exemplo n.º 16

0

Exibir arquivo

Arquivo: contact_map.py Projeto: AspirinCode/MD-analysis-tools-scripts

import matplotlib.pyplot as plt
import mdtraj as md
from contact_map import ContactMap

pdb_list = [ "../pdb_dir_1_500ns/frame0.pdb",
            "../pdb_dir_5001_6000ns/frame4164.pdb"]

# Program takes about several minutes to finish
# It is a bit slow;
for i in range(len(pdb_list)):
    pdb = md.load_pdb(pdb_list[i])
    frame_contacts = ContactMap(pdb[0], cutoff=1.5)
    (fig, ax) = frame_contacts.residue_contacts.plot(cmap='seismic', vmin=-1, vmax=1)
    plt.xlabel("Residue")
    plt.ylabel("Residue")
    fig.savefig(f'cont-map-{i}.pdf', format='pdf', dpi=500)
    plt.close()

# Calculate the difference between two contact maps
diff = contacts[1] - contacts[0]
(fig, ax) = diff.residue_contacts.plot(cmap='seismic', vmin=-1, vmax=1)
plt.xlabel("Residue")
plt.ylabel("Residue")
fig.savefig(f'cont-map-diff.pdf', format='pdf', dpi=500)
plt.close()

Exemplo n.º 17

0

Exibir arquivo

def one_region(path, cell, genome, ch, bi, loci, st=0, ed=None, pdf=None):
    if bi.endswith('kb'):
        reso = int(bi.replace('kb', '')) * 1000
    elif bi.endswith('mb'):
        reso = int(bi.replace('mb', '')) * 1000000
    else:
        raise ValueError('Unknow unit %s' % bi)
    map1 = ContactMap('loop-%s-in-%s' % (ch, cell))
    map1.clear()
    if not map1.load():
        map1.genome_info(path + '/%s_chr_len.txt' % genome)
        map1.create_binnedmap(reso, lazy=True)
        map1.focus_chromosome(ch, st=st, ed=ed)
        if True:  ## read files
            norm = []
            infile = open(path + '/' + cell + '/' + bi +
                          '_resolution_intrachromosomal/' + ch + '/MAPQGE30/' +
                          ch + '_' + bi + '.KRnorm')
            for line in infile:
                norm.append(float(line))
            infile.close()
            expect = []
            infile = open(path + '/' + cell + '/' + bi +
                          '_resolution_intrachromosomal/' + ch + '/MAPQGE30/' +
                          ch + '_' + bi + '.KRexpected')
            for line in infile:
                expect.append(float(line))
            infile.close()
            expect.append(1.0)
            print len(norm), len(expect)
            infile = open(
                path + '/' + cell + '/' + bi +
                '_resolution_intrachromosomal/' + ch + '/MAPQGE30/' + ch +
                '_' + bi + '.RAWobserved', 'r', 2 << 9)
            p1 = []
            p2 = []
            val = []
            for line in infile:
                P1, P2, Val = line.split()
                pos1 = int(P1)
                pos2 = int(P2)
                if pos1 < st or (ed is not None and pos1 >= ed):
                    continue
                if pos2 < st or (ed is not None and pos2 >= ed):
                    continue
                p1.append(pos1)
                p2.append(pos2)
                I = pos1 / reso
                J = pos2 / reso
                IJ = abs(pos1 - pos2) / reso
                #                val.append(float(Val))
                val.append(float(Val) / (norm[I] * norm[J]))
#                val.append(float(Val)/(norm[I]*norm[J]*expect[IJ]))
            map1.inter_loc1 = np.array(p1, dtype='int')
            map1.inter_loc2 = np.array(p2, dtype='int')
            map1.inter_freq = np.array(val, dtype='float')
            chidx = map1.chr2idx[ch]
            map1.inter_chr1 = chidx * np.ones(len(p1), dtype='int')
            map1.inter_chr2 = chidx * np.ones(len(p2), dtype='int')
            infile.close()
        map1.create_binnedmap(reso)
        map1.save()
    show(cell)
    show(ch)
    if pdf is not None:
        map1.plot_map()
        pdf.savefig()
        plt.clf()
    map1.decompose_auto()
    map1.sort_groups()
    show(map1.contact_group.shape)
    show()
    bins = map1.choose_map_loc(loci)

    W = np.asarray(map1.contact_group * map1.group_map)
    n, r = W.shape
    wm = W.sum(1)
    W /= np.mean(wm[wm > 0])
    gini = 1 - np.power(W, 2).sum(1)
    gini[wm == 0] = 0

    outfile = open('loop-%s-in-%s_groups.wig' % (ch, cell), 'w')
    #    outfile.write('track type=wiggle_0 name="Overall" description="BNMF" visibility=full autoScale=off viewLimits=800:1000 color=0,0,0 maxHeightPixels=100:50:20 graphType=bar priority=20\nfixedStep chrom='+ch+' start=%d'%st+' step=%d'%reso+' span=%d\n'%reso)
    #    for i in xrange(n):
    #        outfile.write('%d\n'%int(1000*gini[i]))
    jj = []
    ww = 0
    #    for j in xrange(r):
    #        if W[bins,j].max() < 0.1:
    #            continue
    for j in W[bins, :].argmax(1):
        ww += W[:, j]
        outfile.write(
            'track type=wiggle_0 name="C%s' % (j + 1) +
            '" description="BNMF" visibility=full autoScale=off viewLimits=0:200 color=0,0,0 maxHeightPixels=100:50:20 graphType=bar priority=20\nfixedStep chrom='
            + ch + ' start=%d' % st + ' step=%d' % reso + ' span=%d\n' % reso)
        for i in xrange(n):
            outfile.write('%d\n' % int(1000 * W[i, j]))
        jj.append(j)
#    outfile.write('track type=wiggle_0 name="Overall" description="BNMF" visibility=full autoScale=off viewLimits=0:200 color=0,0,0 maxHeightPixels=100:50:20 graphType=bar priority=20\nfixedStep chrom='+ch+' start=%d'%st+' step=%d'%reso+' span=%d\n'%reso)
#    for i in xrange(n):
#        outfile.write('%d\n'%int(1000*ww[i]))
#    outfile.close()

    sel = range(n)
    lab = ['%dk' % ((i * reso + st) / 1000) for i in sel]
    five = np.arange(0, len(sel), len(sel) / 5)
    if pdf is not None:
        map1.plot_map(map1.contact_group * map1.group_map *
                      map1.contact_group.T,
                      log=False)
        pdf.savefig()
        plt.clf()
        map1.plot_map(map1.contact_group[:, jj] *
                      map1.group_map[jj, :][:, jj] *
                      map1.contact_group[:, jj].T,
                      log=False)
        pdf.savefig()
        plt.clf()
        map1.plot_submap()
        pdf.savefig()
        plt.clf()
        plt.plot(sel, gini[sel], '--k')
        for j in jj:
            plt.plot(sel, W[sel, j], '-', label='C%s' % (j + 1))


#        plt.plot(sel, ww[sel], '-', label='Combined')
        plt.plot(bins, [1.1] * len(bins), 'ro')
        plt.legend()
        plt.xticks([sel[j] for j in five], [lab[j] for j in five])
        pdf.savefig()
        plt.clf()
    return

Exemplo n.º 18

0

Exibir arquivo

Arquivo: compare_cluster.py Projeto: huxihao/BNMF

def get_syn_map(para, bin_size=3200, with_bias=True):
    pdf = PdfPages(para['ExeFile']+'plot1.pdf')
    ## prepare
    map1 = ContactMap('Syn3D')
    map1.genome_info(para['DataPath']+'/Tjong2012GR/yeast_chr_len-Tjong.txt')
    map1.create_binnedmap(3200) ## fixed
    map2 = map1.duplicate()
    map3 = map1.duplicate()

    ## obtain links from PDB
    link_map = np.load('syn_link.npy')
    if with_bias: ## add random bias
        np.random.seed(0)
        bias = np.random.random(link_map.shape[0])
        link_map *= np.outer(bias, bias)
        print link_map.min(), link_map.max(), link_map.mean()
        link_map = np.floor(link_map) ## sampling bias
    map1.contact_map = np.matrix(link_map, dtype='float')

    output = open('syn_yeast_map_bin%s%s.txt'%(bin_size, 'bias' if with_bias else ''), 'w')
    ch = map1.frag_chr
    po = (map1.frag_sta+map1.frag_end)/2
    for i in xrange(link_map.shape[0]):
        for j in xrange(link_map.shape[1]):
            if link_map[i,j] > 0:
                output.write('%s\t%s\t%s\t%s\t0\t%s\t1e-10\t1e-10\n'%(ch[i], po[i], ch[j], po[j], link_map[i,j]))
        output.write('\n')
    output.close()

    map1.get_interactions()
    map1.create_binnedmap(bin_size)
    map1.mask_diag()
    map1.plot_map(title='Heatmap for the number of links')
    pdf.savefig(); plt.clf();
    map1.decompose('NND')
    idx, names = map2.get_locations(map1.output_groups(), st=0, ch=0, po=1, nm=0, add=0)
    dist_map = np.load('syn_dist.npy')
    dist = dist_map[idx,:][:,idx]
    map1.plot_map(dist, title='Heatmap for the average distances', log=False)
    pdf.savefig(); plt.clf();
    pdf.close()
    return map1, dist

Exemplo n.º 19

0

Exibir arquivo

def plot2(para):
    pdf = PdfPages(para['ExeFile'] + 'plot2.pdf')
    ## initalization
    map1 = ContactMap('plot2')
    if True:
        map1.genome_info('../data/yeast_chr_len.txt')
        datafiles = [
            '../data/Duan2010N/interactions_HindIII_fdr0.01_inter.txt',
            '../data/Duan2010N/interactions_HindIII_fdr0.01_intra.txt'
        ]
        for datafile in datafiles:
            map1.add_interactions(datafile)
        map1.create_binnedmap(binsize=10e3)
        map1.mask_diag()
        map1.mask_short()
        map1.mask_low()
    map1.plot_map(map1.contact_map, log=True, vmin=1, vmax=100)
    pdf.savefig()
    plt.clf()
    sel = np.arange(200, 400)
    map1.plot_map(map1.contact_map[sel, :][:, sel], log=True, vmin=1, vmax=100)
    pdf.savefig()
    plt.clf()
    for l in [0, 0.1, 1, 10]:
        map1.reset_solution()
        map1.decompose('NMF-PoissonManifoldEqual', dim_num=55, par_lam=l)
        R = map1.contact_group * map1.group_map * map1.contact_group.T
        map1.plot_map(R[sel, :][:, sel], vmin=1, vmax=100, title=str(l))
        pdf.savefig()
        plt.clf()
    pdf.close()

Exemplo n.º 20

0

Exibir arquivo

def run5(name='demo'):
    show(
        '''
    Mapping clusters by changing the number of total clusters 
    ''', True)
    map1 = ContactMap(name)
    map2 = ContactMap(name)
    assert map1.load()
    assert map2.load()
    #    dims = [10,20,30,40,50,60,70,80]
    dims = range(5, 31, 1)
    show('Bin Size\tMetric')
    map1.decompose_auto(dim_num=30)
    full = np.arange(map1.contact_group.shape[1])
    show(full.tolist(), True)
    from contact_map import gini_impurity
    for r in dims:
        show(r)
        map2.decompose_auto(dim_num=r)
        show(gini_impurity(np.diag(map2.group_map)))
        match = map1.best_cor(map2, dims=True)
        dt = {}
        for i, j in match:
            dt[i] = j
        for i in full:
            if i in dt:
                show(dt[i])
            else:
                show('')
        show()

Exemplo n.º 21

0

Exibir arquivo

def one_cell(path, pdf, cell, genome):
    map1 = ContactMap()
    map1.genome_info(path+'/%s_chr_len.txt'%genome)

    cci = 0; ccj = 0
    ni = 0; nj = 0
    tadlen = []
    tadtype = []
    grptype=[]
    gini = []
    entropy = []
    for i in sorted(map1.idx2chr.keys()):
        info = one_chr(path=path, cell=cell, genome=genome, ch=map1.idx2chr[i], pdf=pdf)
        CCI,CCJ,NI,NJ,TADLEN,TADTYPE,GRPTYPE,GINI,ENTRO = info
        cci += CCI
        ccj += CCJ
        ni += NI
        nj += NJ
        tadlen += TADLEN
        tadtype += TADTYPE
        grptype += GRPTYPE
        gini += GINI[GINI>0].tolist()
        entropy += ENTRO[ENTRO>0].tolist()

    tadlen = np.array(tadlen)
    plt.hist(tadlen*resolution, np.arange(tadlen.max())*resolution)
    plt.title('Distribution of TAD sizes in %s'%cell)
    pdf.savefig(); plt.clf()

    bins = range(max(tadtype)+1)
    count = histogram(tadtype, bins, False)
    show(bins, True)
    show(count, True)
    tadtype = np.array(tadtype)
    plt.hist(tadtype, np.arange(tadtype.max()+1))
    plt.title('Distribution of covered clusters in %s'%cell)
    pdf.savefig(); plt.clf()

    bins = range(max(grptype)+1)
    count = histogram(grptype, bins, False)
    show(bins, True)
    show(count, True)
    grptype = np.array(grptype)
    plt.hist(grptype, np.arange(grptype.max()+1))
    plt.title('Distribution of covered TADs in %s'%cell)
    pdf.savefig(); plt.clf()

#    plt.plot(np.arange(plot_left, plot_right), cci/ni, '.-r', label='TAD start')
#    plt.plot(np.arange(plot_left, plot_right), ccj/nj, '.-b', label='TAD end')
    plt.plot(np.arange(plot_left, plot_right), (cci+ccj)/(ni+nj), '.-k', label='TAD boundary')
    plt.xlabel('Genomic distances (kb)')
    plt.ylabel('Gini impurity score')
    plt.xticks(np.arange(plot_left, plot_right), np.arange(plot_left, plot_right)*resolution)
    plt.xlim([plot_left, plot_right])
    plt.title('Average scores around TAD in %s'%cell)
    plt.legend()
    pdf.savefig(); plt.clf()

    show(mean_std(gini))
    plt.hist(gini, np.arange(0,1.001,0.05))
    plt.title('Distribution of Gini impurity scores')
    plt.xlabel('Gini impurity scores')
    plt.ylabel('Frequency')
    pdf.savefig(); plt.clf()
    show(mean_std(entropy))

    plt.hist(entropy, np.arange(0,6,0.2))
    plt.title('Distribution of entropy at TAD boundaries')
    plt.xlabel('Entropy')
    plt.ylabel('Frequency')
    pdf.savefig(); plt.clf()

Exemplo n.º 22

0

Exibir arquivo

def one_chr(path, cell, genome, ch, pdf=None):
    map1 = ContactMap('tad-%s-in-%s'%(ch,cell))
#    map1.clear()
    if not map1.load():
        map1.genome_info(path+'/%s_chr_len.txt'%genome)
        map1.focus_chromosome(ch)
        map1.create_binnedmap(40e3, lazy=True)
        map1.contact_map = np.loadtxt(path+'/'+cell+'/uij.'+ch)
        print cell, ch, map1.frag_sta.shape[0], map1.contact_map.shape[0]
        assert map1.frag_sta.shape[0] == map1.contact_map.shape[0]
        map1.get_sparse_interactions()
        map1.create_binnedmap(resolution*1000)
        map1.mask_diag()
        map1.mask_short()
        map1.mask_low()
        map1.save()
    show(cell)
    show(ch)
    if pdf is not None:
        map1.plot_map()
        pdf.savefig(); plt.clf()
    map1.decompose_auto(update=False)
    map1.sort_groups()
    show(map1.contact_group.shape)
    if pdf is not None:
        map1.plot_submap()
        pdf.savefig(); plt.clf()
    TAD_st, _ = map1.get_locations(path+'/'+cell+'/'+domain, st=0, ch=0, po=1, add=0)
    TAD_ed, _ = map1.get_locations(path+'/'+cell+'/'+domain, st=0, ch=0, po=2, add=-1)
    TAD = zip(TAD_st, TAD_ed)

    W = np.asarray(map1.contact_group * map1.group_map)
    wm = W.sum(1)
    W /= np.mean(wm[wm>0])
    group = np.argmax(W,1)+1
    group[wm==0] = -1 ## masked regions

    gini = 1-np.power(W,2).sum(1)
    gini[wm==0] = -1 ## masked regions
    log2W = np.log2(W)
    log2W[W==0] = 0
    entropy = (-W*log2W).sum(1)
    entropy[wm==0] = 0

    score = gini
    score[score<0] = 0 ## for ploting

    for i in [1,np.argmax(entropy)/50]:
        sel = np.arange(i*50, min(W.shape[0],(i+1)*50))
        pos = ['%.fM'%(j*resolution*1e-3) for j in sel]
        if pdf is not None:
            fig = plt.figure()
            axis = fig.add_subplot(211)
#            axis.plot(sel, score[sel], '--k')
        for i in xrange(W.shape[1]):
#            if W[sel,i].max() > 0.1:
                if pdf is not None:
                    axis.plot(sel, W[sel,i], label='C%s'%i)
        for i,j in TAD:
            if i in sel and j in sel:
                if pdf is not None:
                    axis.plot([i,j], [1,1], 'k-', linewidth=2)
        if pdf is not None:
            plt.ylim([0,1.2])
            plt.xticks(sel[::int(len(sel)/5)], pos[::int(len(sel)/5)])
            axis = fig.add_subplot(212)
            from matplotlib.colors import LogNorm
            axis.imshow(map1.contact_map[sel,:][:,sel], interpolation='none', norm=LogNorm(), aspect='equal', cmap='OrRd')
            axis.legend()
            fig.savefig(pdf, format='pdf')
            plt.clf()
    
    tad = np.zeros_like(gini)
    tadlen = []
    for i,j in TAD:
        for k in xrange(i+1, j-1):
            tad[k] = (i+j+1)/2 ## regions in the domain
        tadlen.append(j-i)

    tadtype = []
    for i in np.unique(tad):
        if i > 0:
            tadtype.append(len(np.unique(group[tad==i])))
    grptype = []
    for i in np.unique(group):
        if i > 0:
            grptype.append(len(np.unique(tad[group==i])))

    show(np.sum(np.logical_and(tad==0,gini>=0))) ## TADs
    for cut in [0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]:
        show(np.sum(gini>=cut)) ## clusters
        show(np.sum(np.logical_and(tad==0,gini>=cut))) ## TADs % clusters

    cci = 0; ccj = 0 ## scores around the TAD boundaries
    ni = 0; nj = 0
    for i,j in TAD:
        if i+plot_left >=0 and i+plot_right < len(score):
            cci += score[(i+plot_left):(i+plot_right)]
            ni += 1
        if j+plot_left >=0 and j+plot_right < len(score):
            ccj += score[(j+plot_left):(j+plot_right)]
            nj += 1
    show()
    return cci,ccj,ni,nj,tadlen,tadtype,grptype,gini,entropy

Exemplo n.º 23

0

Exibir arquivo

Arquivo: package_curve.py Projeto: huxihao/BNMF

def decompose_dist(pdf, curve, r=None):
    'Decompose the Euc distance matrix on curve'
    from contact_map import ContactMap, EIG, NMF_main
    map1 = ContactMap()
    curve_show(curve)
    pdf.savefig(); plt.clf()
    verts = np.array(curve)
    map1.plot_map(verts, title = "Verteces", log=False)
    pdf.savefig(); plt.clf()
    from scipy.spatial.distance import pdist, squareform
    ds = squareform(pdist(verts, 'euclidean'))

    ## transform
#    V = ds.max() - ds
#    V = ds.max() / (ds + 1)
    V = ds.max() * ((ds+1)**-2)
    map1.plot_map(V, title="Synthetic Heatmap", log=True)
    pdf.savefig(); plt.clf()

    #plt.hist(np.reshape(V,(-1)), bins=100, normed=1, facecolor='blue')
    #plt.title('Distribution of map values')
    #pdf.savefig(); plt.clf()
    plt.loglog([(i+1.0)/V.shape[0] for i in range(V.shape[0])], 
               trace_sum(V), linestyle='-.')
    plt.title('Distribution of interactions along 1D')
    plt.xlabel('Ratio of linked locations to the total length')
    plt.ylabel('Number of observed links')
    pdf.savefig(); plt.clf()

    if r == None:
        r = choose_size(pdf, V, 9)
        show('Best number of dimentions is %s\n'%r)
        r = 4
    if False: ## try PCA
        U = (V-np.mean(V.T,axis=1)).T
        Q, M = EIG(np.cov(U), r)
    else:
        Q, M = EIG(V, r)
    map1.plot_map(Q, title = 'Eig. Decomp. - Q Matrix', log=False)
    pdf.savefig(); plt.clf()
    map1.plot_map(M, title = 'Eig. Decomp. - M Matrix', log=False)
    pdf.savefig(); plt.clf()
    map1.plot_map(Q*M*Q.T, title = 'Eig. Decomp. - Recovered', log=False)
    pdf.savefig(); plt.clf()
    sep_map_show(pdf, verts, Q)

    H, S, obj = NMF_main(V, J='NMF-PoissonManifoldEqual', H=Q, S=M, r=r)
    map1.plot_map(H*S*H.T, title = 'NMF Decomp. - Recovered', log=False)
    pdf.savefig(); plt.clf()
    map1.plot_map(H, title = 'NMF Decomp. - H Matrix', log=False)
    pdf.savefig(); plt.clf()
    map1.plot_map(S, title = 'NMF Decomp. - S Matrix', log=False)
    pdf.savefig(); plt.clf()
    maxp = np.argmax(np.asarray(H),0)
    srt = np.argsort(maxp)
    sep_map_show(pdf, verts, H[:,srt])

    try:
        from sklearn.cluster import KMeans
        km = KMeans(n_clusters=r)
        H = -np.matrix(km.fit_transform(V))
        S = np.matrix(np.eye(r))
        maxp = np.argmax(np.asarray(H),0)
        srt = np.argsort(maxp)
        map1.plot_map(H, title = 'K-means Decomp. - H Matrix', log=False)
        pdf.savefig(); plt.clf()
        sep_map_show(pdf, verts, H[:,srt])
    except:
        print 'Please install SK-kit to run K-means'
        pass

Exemplo n.º 24

0

Exibir arquivo

Arquivo: map_plots.py Projeto: huxihao/BNMF

def plot2(para):
    pdf = PdfPages(para['ExeFile']+'plot2.pdf')
    ## initalization
    map1 = ContactMap('plot2')
    if True:
        map1.genome_info('../data/yeast_chr_len.txt')
        datafiles = ['../data/Duan2010N/interactions_HindIII_fdr0.01_inter.txt',
                     '../data/Duan2010N/interactions_HindIII_fdr0.01_intra.txt'] 
        for datafile in datafiles:
            map1.add_interactions(datafile)
        map1.create_binnedmap(binsize=10e3)
        map1.mask_diag()
        map1.mask_short()
        map1.mask_low()
    map1.plot_map(map1.contact_map, log=True, vmin=1, vmax=100)
    pdf.savefig(); plt.clf();
    sel = np.arange(200,400)
    map1.plot_map(map1.contact_map[sel,:][:,sel], log=True, vmin=1, vmax=100)
    pdf.savefig(); plt.clf();
    for l in [0, 0.1, 1, 10]:
        map1.reset_solution()
        map1.decompose('NMF-PoissonManifoldEqual', dim_num=55, par_lam=l)
        R = map1.contact_group * map1.group_map * map1.contact_group.T
        map1.plot_map(R[sel,:][:,sel], vmin=1, vmax=100, title=str(l))
        pdf.savefig(); plt.clf();
    pdf.close()

Exemplo n.º 25

0

Exibir arquivo

Arquivo: map_plots.py Projeto: huxihao/BNMF

def plot1(para):
    pdf = PdfPages(para['ExeFile']+'plot1.pdf')
    ## initalization
    map1 = ContactMap('plot1')
    map1.clear()
    ## read chromosome sizes
    if not map1.load():
        map1.genome_info('../data/yeast_chr_len.txt')
        datafiles = ['../data/Duan2010N/interactions_HindIII_fdr0.01_inter.txt',
                     '../data/Duan2010N/interactions_HindIII_fdr0.01_intra.txt'] 
        for datafile in datafiles:
            map1.add_interactions(datafile)
        map1.create_binnedmap(binsize=20e3)
        map1.mask_diag()
        map1.mask_short()
        map1.mask_low()

    map1.decompose_auto(plot=pdf)
    map1.sort_groups()
    map1.save()

    map1.plot_map(vmin=1, vmax=1000, title='$X$')
    pdf.savefig(); plt.clf();
    map1.plot_map(np.diag(map1.bias_vector), log=False, title='$B$')
    pdf.savefig(); plt.clf();
    map1.plot_map(map1.contact_group, log=False, title='$H$')
    pdf.savefig(); plt.clf();
    map1.plot_map(map1.group_map, log=False, title='$S$')
    pdf.savefig(); plt.clf();
    map1.plot_map(map1.group_map * map1.contact_group.T, log=False, title='$W=SH^T$')
    pdf.savefig(); plt.clf();
    map1.plot_map(map1.contact_group * map1.group_map * map1.contact_group.T, vmin=1, vmax=1000, title='$R=HSH^T$')
    pdf.savefig(); plt.clf();
    grps = map1.label_groups(plot=pdf)
    r = map1.contact_group.shape[1]
    for i in [0,r-2,r-1]:
        map1.plot_map(map1.contact_group[:,i] * map1.contact_group[:,i].T, vmin=1, title=str(i+1))
        pdf.savefig(); plt.clf();
    map1.plot_map(np.outer(map1.bias_vector, map1.bias_vector), log=False)
    pdf.savefig(); plt.clf();
    map1.add_bias_back()
    map1.plot_map(map1.contact_group * map1.group_map * map1.contact_group.T, vmin=1, vmax=1000, title='$R=HSH^T$')
    pdf.savefig(); plt.clf();
    pdf.close()

Exemplo n.º 26

0

Exibir arquivo

def run3(para, name='demo'):
    show(
        '''
    Compare objective values in NMF and average distances in PDB.
    ''', True)
    pdf = PdfPages(para['ExeFile'] + 'plot.pdf')
    map1 = ContactMap('Syn3D')
    map1.genome_info(para['DataPath'] + '/Tjong2012GR/yeast_chr_len-Tjong.txt')
    map1.create_binnedmap(3200)
    map2 = map1.duplicate()
    map1.contact_map = np.matrix(np.load('syn_link.npy'), dtype='float')
    map1.get_interactions()
    map1.create_binnedmap(32000)
    map1.mask_diag()
    paras = map1.decompose_auto()
    r = paras[-1][0]
    map3 = map1.duplicate()
    show(r)
    show('is the selected cluster number\n')
    print map1.contact_map.shape
    idx, names = map2.get_locations(map1.output_groups(),
                                    st=0,
                                    ch=0,
                                    po=1,
                                    nm=0,
                                    add=0)
    print len(idx)
    dist_map = np.load('syn_dist.npy')
    show(dist_map.mean())
    show('is the average of all bins\n')
    dist = dist_map[idx, :][:, idx]  ## distance among bins
    show(dist.mean())
    show('is the average of selected bins\n')
    inum = []
    objs = []
    avgs = []
    objs3 = []
    avgs3 = []
    show(
        '\tObjective function values\tAverage intra-cluster distances\tcase2\n'
    )
    map1.reset_solution()
    map1.decompose('NND', dim_num=r)
    map3.reset_solution()
    #    map3.decompose('NND', dim_num=r)
    icc = 0
    step = 20
    for i in xrange(100):
        icc += step
        show(icc)
        inum.append(icc)
        obj = map1.decompose(dim_num=r, par_lam=1, max_iter=step, stop_thrd=0)
        obj3 = map3.decompose(dim_num=r, par_lam=1, max_iter=step, stop_thrd=0)
        map1.sort_groups('diagnal')
        show(obj)
        avg = []
        avg3 = []
        for j in xrange(r):
            idx1 = np.asarray(map1.contact_group)[:, j] > float(
                map1.contact_group[:, j].mean())
            D1 = dist[idx1, :][:, idx1]
            d1 = D1[np.triu_indices(D1.shape[0], k=1)]
            avg += d1.tolist()

            idx3 = np.asarray(map3.contact_group)[:, j] > float(
                map3.contact_group[:, j].mean())
            D3 = dist[idx3, :][:, idx3]
            d3 = D3[np.triu_indices(D3.shape[0], k=1)]
            avg3 += d3.tolist()
        show(mean_std(avg)[0])
        show(mean_std(avg3)[0])
        show()
        objs.append(obj)
        objs3.append(obj3)
        avgs.append(mean_std(avg)[0])
        avgs3.append(mean_std(avg3)[0])
    plt.plot(inum, objs, 'r-', label='NNDSVD Initialization')
    #    plt.plot(inum, objs3, 'b--', label='Random Initialization')
    plt.legend()
    plt.xlabel('Number of iterations')
    plt.ylabel('Objective function values for NMF')
    pdf.savefig()
    plt.clf()
    plt.plot(objs, avgs, 'r.', label='NNDSVD Initialization')
    #    plt.plot(objs3, avgs3, 'b+', label='Random Initialization')
    plt.legend()
    plt.xlabel('Objective function values for NMF')
    plt.ylabel('Average intra cluster distances (nm)')
    pdf.savefig()
    plt.clf()
    show('\nCorrelation of objective with the average distances\n')
    show('Pearson Coef.')
    show(correlation(objs, avgs), True)
    show('Spearman Rank Coef.')
    show(correlation(objs, avgs, rank=True), True)
    map1.plot_submap()
    pdf.savefig()
    plt.clf()
    map3.plot_submap()
    pdf.savefig()
    plt.clf()
    pdf.close()

Exemplo n.º 27

0

Exibir arquivo

def plot1(para):
    pdf = PdfPages(para['ExeFile'] + 'plot1.pdf')
    ## initalization
    map1 = ContactMap('plot1')
    map1.clear()
    ## read chromosome sizes
    if not map1.load():
        map1.genome_info('../data/yeast_chr_len.txt')
        datafiles = [
            '../data/Duan2010N/interactions_HindIII_fdr0.01_inter.txt',
            '../data/Duan2010N/interactions_HindIII_fdr0.01_intra.txt'
        ]
        for datafile in datafiles:
            map1.add_interactions(datafile)
        map1.create_binnedmap(binsize=20e3)
        map1.mask_diag()
        map1.mask_short()
        map1.mask_low()

    map1.decompose_auto(plot=pdf)
    map1.sort_groups()
    map1.save()

    map1.plot_map(vmin=1, vmax=1000, title='$X$')
    pdf.savefig()
    plt.clf()
    map1.plot_map(np.diag(map1.bias_vector), log=False, title='$B$')
    pdf.savefig()
    plt.clf()
    map1.plot_map(map1.contact_group, log=False, title='$H$')
    pdf.savefig()
    plt.clf()
    map1.plot_map(map1.group_map, log=False, title='$S$')
    pdf.savefig()
    plt.clf()
    map1.plot_map(map1.group_map * map1.contact_group.T,
                  log=False,
                  title='$W=SH^T$')
    pdf.savefig()
    plt.clf()
    map1.plot_map(map1.contact_group * map1.group_map * map1.contact_group.T,
                  vmin=1,
                  vmax=1000,
                  title='$R=HSH^T$')
    pdf.savefig()
    plt.clf()
    grps = map1.label_groups(plot=pdf)
    r = map1.contact_group.shape[1]
    for i in [0, r - 2, r - 1]:
        map1.plot_map(map1.contact_group[:, i] * map1.contact_group[:, i].T,
                      vmin=1,
                      title=str(i + 1))
        pdf.savefig()
        plt.clf()
    map1.plot_map(np.outer(map1.bias_vector, map1.bias_vector), log=False)
    pdf.savefig()
    plt.clf()
    map1.add_bias_back()
    map1.plot_map(map1.contact_group * map1.group_map * map1.contact_group.T,
                  vmin=1,
                  vmax=1000,
                  title='$R=HSH^T$')
    pdf.savefig()
    plt.clf()
    pdf.close()