Пример #1
0
def gdraw0(graphs, plotname = 'default_name', measure = 'cosine'):
    pos = nx.graphviz_layout(graphs['kg'])


    adjs = [ array(nx.adj_matrix(g)) for g in graphs.values() ]
    nrms = []
    for a in adjs:
            n = sqrt(sum(a**2))
            nrms.append(a / n)
    
    kgelt = graphs.keys().index('kg')
    if measure == 'cosine':
        sims = array([round(nfu.cosine_adj(a1,nrms[kgelt]),8) for a1 in nrms])
    else:
        raise Exception()

    kg = graphs['kg']
    srto = argsort(graphs.keys()) 
    #XVALs give ranks of each key index.
    xvals = argsort(srto)


    cols = map(lambda x: 
               ('flt' in x and x.count('thr') > 1) and 'orange' or
               ('flt' in x) and 'red' or
               ('thr' in x) and 'yellow' or
               ('fg' in x) and 'green' or 
               ('su' in x) and 'blue' or 
               'black', graphs.keys())

    yvals = sims

    f = plt.gcf()
    f = myplots.fignum(3, (.25 * len(sims),10))
    f.clear()
    ax = f.add_subplot(111)
    myplots.padded_limits(ax,xvals,yvals + [0.], margin = [.02,.02])
    ax.scatter(xvals,yvals,100, color = cols)
    ax.set_ylabel('red fly similarity ({0})'.format(measure))
    ax.set_xlabel('networks')
    ax.set_xticklabels([])
    ax.set_xticks([])
    mark_ys = [0, median(sims), mean(sims), sort(sims)[::-1][1],1]
    ax.hlines(mark_ys, *ax.get_xlim(), linestyle = ':',alpha = .2)
    

    f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_nolabels.pdf'.\
                               format(plotname,measure)))


    ax.set_xticks(range(len(srto)))
    ax.annotate('\n'.join(' '.join(z) for z in zip(graphs.keys(),cols)),
                [0,1],xycoords = 'axes fraction', va = 'top')
    
    ax.set_xticklabels([graphs.keys()[i] for i in srto], 
                       rotation = 45, size = 'xx-small',ha = 'right')

    f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_labels.pdf'.\
                               format(plotname,measure)))
Пример #2
0
    def setFetchAliNames(**kwargs):
        fa_file = cfg.dataPath('pvt1/pvt1.fa')

    
        ali = aio.parse(open(fa_file), 'fasta')
        a0 = ali.next()
        return [a.id for a in a0]
Пример #3
0
def run_batch(geometry):
    muts = [.0005, .001, .002, .003, .004]
    delta_multiplier = .125
    geometries = [geometry]
    dims = [20, 40, 80]
    iters = 400
    import compbio.config as cfg

    root = cfg.dataPath('avida_runs')
    if not os.path.isdir(root):
        os.mkdir(root)
    os.chdir(root)

    exec_subdirs = [
        'big_{0}_{1}_{2}_{3}'.format(i, j, k, l) for l in range(iters)
        for k in dims for j in muts[::-1] for i in geometries
    ]
    for e in exec_subdirs:
        if not os.path.isdir(os.path.join(root, e)):
            os.mkdir(os.path.join(root, e))
        os.chdir(os.path.join(root, e))
        p = get_params(psets['lots'])
        make_cfg(p,
                 geo=e.split('_')[1],
                 mut=float(e.split('_')[2]),
                 delta=delta_multiplier * float(e.split('_')[2]),
                 dim=int(e.split('_')[3]))

        d = os.path.join(root, e)
        os.chdir(d)
        prc = subprocess.Popen(
            'bsub -o bsub_log -q compbio-week "avida -v0 -c proj0/autogen/avida.cfg.auto"',
            shell=True,
            stdout=subprocess.PIPE)
        print prc.stdout.read()
Пример #4
0
def cr_locii(chrname = 'CHR_I', **kwargs):
    ng =20;
    
    root = cfg.dataPath('/data/genomes/Caenorhabditis_elegans')
    fdir = os.path.join(root,chrname)
    for r, d, files in os.walk(fdir):
        for f in files:
            if '.gb' in f:
                fopen = open(os.path.join(r,f))
                break
    gb = list(sio.parse(fopen, 'genbank'))[0]

    chrinfos= parse_genes();
    for i in range(ng):
        g = chrinfos[chrname][i]
        gid = g.qualifiers['db_xref'][1][9:] 
        print gid
        neighbor_range =[max(0, g.location.start.position-5000),
                         min(len(gb.seq)-1, g.location.end.position+5000)]
        feats_matching =[f for f in gb.features
                         if f.location.start.position > neighbor_range[0]
                         and f.location.end.position < neighbor_range[1]]
        
        seq = gb.seq[neighbor_range[0]:neighbor_range[1]]
    
        import Bio.SeqRecord as sr
        import Bio.Seq as sq
        rec = sr.SeqRecord(sq.Seq(seq.tostring(),seq.alphabet),
                           name = '{0}_g{1}'.format(chrname,i),
                           description='genomic neighborhood of {0}'\
                               .format(gid),
                           annotations = {'organism':'worm'})

        import Bio.SeqFeature as sf
        import copy
        for f in feats_matching:
            
            fnew = copy.deepcopy(f)
            fnew.location.start.position -= neighbor_range[0]
            fnew.location.end.position -= neighbor_range[0]
            rec.features.append(fnew)
        root = cfg.dataPath('modencode/worm_gene_extracts/{0}'.format(chrname))
        if not os.path.isdir(root): os.mkdir(root)
        fname = os.path.join(root,'{0}.gbk'.format(gid))
        fopen = open(fname, 'w')
        sio.write(rec, fopen, 'genbank')
        fopen.close()
Пример #5
0
    def set_map_rows(**kwargs):
        mapfile = cfg.dataPath("wormbase/loci_all.txt")
        fopen = open(mapfile)
        lines = fopen.readlines()
        cols = [e.strip() for e in lines[0].strip().split(",")]
        rows = [dict(zip(cols, [e.strip() for e in l.strip().split(",")])) for l in lines[1:-1]]

        return rows
Пример #6
0
 def set_rows(**kwargs):
     root = cfg.dataPath('wormbrain/2006')
     connect_file = os.path.join(root, 'NeuronConnect.xls')
     fp_file = os.path.join(root,'NeuronFixedPoints.xls')
    
     cwb = xlrd.open_workbook(connect_file)
     sh = cwb.sheets()[0]
    
     rows = [[e.value for e in sh.row(i)] for i in range(1,sh.nrows) ]
     return rows
Пример #7
0
def figpath(filename = 'plot.pdf', delete = False):
    called_name = inspect.stack()[1][3]
    stack = inspect.stack()[1]
    call_path = os.path.abspath(stack[1])
    path = cfg.relPath(call_path)
    figpath = cfg.dataPath(os.path.join('figs',path,called_name,filename))
    if delete:
        if os.path.isfile(figpath):
            os.remove(figpath)
    return figpath
Пример #8
0
    def setFetchNumAli(**kwargs):
        fa_file = cfg.dataPath('pvt1/pvt1.fa')

    
        ali = aio.parse(open(fa_file), 'fasta')
        a_ali = ali.next()
    
        a0  =  [[nt_dict[elt] for elt in a.seq.upper()] for a in a_ali]
        a0_num = array(a0, byte)        
        return a0_num
Пример #9
0
def load_pdbfiles(nbs = False):
    bfiles = dict([(rvd, cfg.dataPath('structs/3ugm/3UGM_{0}_{1}_NOFIX.pdb'\
                                          .format(rvd,'NBS' if nbs else 'NONBS')))
                    for rvd in ['NN', 'NH', 'NK']])
    structs = {}
    for rvd, fname in bfiles.iteritems():
        fopen = open(fname)
        pparse = PDB.PDBParser(PERMISSIVE = 0)
        struct = pparse.get_structure('tmp', fopen)
        fopen.close()
        structs[rvd] = struct
        
    return structs
Пример #10
0
def fetch_cluster_results(keys):
    cells = []
    for k in keys:
        try:
            res = sio.loadmat(cfg.dataPath('soheil/ben_results/cluster_results/results/'+\
                                               'expression_c4d_n4_t{0}.mat'.format(k)))
            
            cells.append((k, [  dict(gene = int(squeeze(r[0][0])),
                                     sign = r[1][0],
                                     module = tuple(sorted(r[2][0])),
                                     score = r[4][0])
                                for r in res['network_cell']]))
        except Exception, e:
            cells.append((k,[]))
Пример #11
0
def load_bonds(nbs = False):
    bfiles = dict([(rvd,cfg.dataPath('structs/bonds/3UGM/3UGM_{0}_{1}_NOFIX.bonds'\
                                         .format(rvd, 'NBS' if nbs else 'NONBS')))
              for rvd in ['NN', 'NH', 'NK']])
    all_bonds = {}
    for k,bf in bfiles.iteritems():
        fopen = open(bf)
        data = fopen.readlines()
        bonds = []
        for line in data:
            split = line.index('))') + 2
            bonds.append(dict(zip( ['atoms','dist'], 
                                   [json.loads(line[0:split].replace('(','[').replace(')',']').replace("'", '"')),json.loads(line[split:])]
                                   )))
        all_bonds[k.upper()] = bonds
        
    return all_bonds
Пример #12
0
    def set_genes(**kwargs):

      lens =[]
      all_genes = {}
      names = chromosome_names()
      for name in names:
         root = cfg.dataPath('/data/genomes/Caenorhabditis_elegans')
         fdir = os.path.join(root,name)
         for r, d, files in os.walk(fdir):
             for f in files:
                 if '.gb' in f:
                     fopen = open(os.path.join(r,f))
                     break
      

         gb = list(sio.parse(fopen, 'genbank'))[0]
         genes = [f for f in gb.features if f.type== 'gene']
         all_genes[name] = genes
         fopen.close()
      return all_genes
Пример #13
0
def mod_list():
    path = cfg.dataPath("soheil/results_new/to ben-july 21/modules/denovo.mat")
    mods_d = sio.loadmat(path)['denovo']
    
    gene_mods = {}
    tf_mods = {}
    for m in mods_d:
        m = [tuple(list(elt[0])) for elt in m]
        gm = gene_mods.get(m[1], [])
        tm = tf_mods.get(m[0], [])

        gm.append(m[0])
        tm.append(m[1])

        gene_mods[m[1]] = gm
        tf_mods[m[0]] = tm

    


    return(tf_mods, gene_mods)
Пример #14
0
 def set_tiling_peaks(**kwargs):
     root = cfg.dataPath('modencode/wormtile/computed-peaks_gff3')
     files = [os.path.join(root, f) for f in os.listdir(root)]
     out = {}
     for f in files:
         if f[-2:] != 'gz': continue
         fopen= gzip.open(f)
         data = [l for l in fopen.readlines() if not l[0] == '#']
         
         out[os.path.basename(f)] = \
             [dict(zip(['chr', 'meth', 'type',
               'start','end','score',
               'blank','blank2','annotations' ], 
                      l.strip().split('\t')))
              for l in data]
     
     for k,v in out.iteritems():
         for d in v:
             d['start'] = int(d['start'])
             d['end'] = int(d['end'])
             d['score'] = float(d['score'])
     return out
Пример #15
0
    def set_chromosome_offsets(**kwargs):

      lens =[]
      names = chromosome_names()
      for name in names:
         root = cfg.dataPath('/data/genomes/Caenorhabditis_elegans')
         fdir = os.path.join(root,name)
         for r, d, files in os.walk(fdir):
             for f in files:
                 if '.gb' in f:
                     fopen = open(os.path.join(r,f))
                     break
      
         gb = list(sio.parse(fopen, 'genbank'))[0]
         fopen.close()
         lens.append( gb.features[0].location.end.position)

      offsets = {}
      cur_ofs = 0
      for i, l in enumerate(lens):
        offsets[names[i]] = cur_ofs
        cur_ofs += l 
      return offsets
Пример #16
0
    def setTf_Chip_Peaks(**kwargs):
        root = cfg.dataPath('wormchip')
        files = [os.path.join(root, f) for f in os.listdir(root)]
        out = {}
        for f in files:
            fopen= open(f)
            data = [l for l in fopen.readlines() if not l[0] == '#']
            
            out[os.path.basename(f)] = \
                [dict(zip(['chr', 'meth', 'type',
                  'start','end','score',
                  'blank','blank2','qValue' ], 
                         l.strip().split('\t')))
                 for l in data]
            vlens = [len(e) for e in out[os.path.basename(f)]]

        for k,v in out.iteritems():
            for d in v:
                d['start'] = int(d['start'])
                d['end'] = int(d['end'])
                d['score'] = float(d['score'])
                d['qValue'] = float(d['qValue'].split('=')[1])
        return out
Пример #17
0
def run_batch(geometry):
    muts = [.0005,.001,.002,.003,.004]
    delta_multiplier = .125
    geometries = [geometry]
    dims =  [20,40,80]
    iters = 400
    import compbio.config as cfg
    
    root = cfg.dataPath('avida_runs')
    if not os.path.isdir(root):
        os.mkdir(root)
    os.chdir( root)

    exec_subdirs = ['big_{0}_{1}_{2}_{3}'.format(i,j,k,l) 
                    for l in range(iters)
                    for k in dims
                    for j in muts[::-1]
                    for i in geometries 
                   ]
    for e in exec_subdirs:
        if not os.path.isdir(os.path.join(root,e)):
            os.mkdir(os.path.join(root,e))
        os.chdir(os.path.join(root,e))
        p = get_params(psets['lots'])
        make_cfg(p,
                 geo = e.split('_')[1],
                 mut = float(e.split('_')[2]),
                 delta = delta_multiplier * float(e.split('_')[2]),
                 dim = int(e.split('_')[3])
                 )
                
        d = os.path.join(root,e)
        os.chdir(d)
        prc = subprocess.Popen('bsub -o bsub_log -q compbio-week "avida -v0 -c proj0/autogen/avida.cfg.auto"',
                         shell = True,
                               stdout = subprocess.PIPE)
        print prc.stdout.read()
Пример #18
0
import Bio.SeqIO as SeqIO
import Bio.Seq as seq
import Bio.Alphabet as Alphabet
import Bio.Align as al
import Bio.AlignIO as aio
import Bio.SeqRecord as sr

from numpy import *
import os, re
import cb.config as cfg
temp_dir = cfg.dataPath('alg')
if not os.path.isdir(temp_dir):
    os.mkdir(temp_dir)
import subprocess as spc

def writelines(lines,gid):
    ali = al.MultipleSeqAlignment([sr.SeqRecord(seq.Seq(''.join(line)),
                                                '{1}{0}i'.format(i,gid),
                                                description = '{1}{0}d'.format(i,gid)
                                                ) 
                                   for i, line in enumerate(lines[::50])])
    return ali
    
def write_fa(lines, gid):
    fpath = os.path.join(temp_dir, '{0}_seqlines.fa'.format(gid))
    fopen = open(fpath,'w')
    alignment = writelines(lines,gid)
    SeqIO.write(alignment,fopen,'fasta')
    fopen.close()
    return fpath
Пример #19
0
import cb.projects.network.io as nio
import numpy as np
from numpy import *
import cb.utils.plots as myplots
import cb.config as cfg
import cb.utils.colors as mycolors
import cb.utils.graphs.utils as gutils
import networkx as nx
import matplotlib.pyplot as plt

from scipy.stats import linregress

ecount = 200000
run_name = 'bn_edges'
figtemplate = cfg.dataPath('figs/filtering/comparator-{0}-{{0}}.pdf'.\
                               format(run_name))
net_types = 'all'
pctype = 'bn'



def get_graphs(selector = 'just_nets',
               net_choice = net_types,
               pctype = pctype,
               fixed_ecount = ecount,
               restriction = 'none'):

    '''Get a list of graphs.

net_choices:
  'all': fetch all available nets and build graphs according to 
Пример #20
0
def make_cfg(plot_params,
             name =default_name,
             geo = 'square',
             mut = .002,
             delta = .0005,
             dim = 20):

    if not os.path.isdir(name):
        os.mkdir(name)

    import shutil
    flist = os.listdir(name)
    cfg_start = cfg.dataPath('avida_default_cfgfiles')

    for r,d,files in os.walk(cfg_start):
        for f in files:
            if f in flist:
                continue
            shutil.copyfile(os.path.join(r,f),
                            os.path.join(name, f))


    #default_file = open(os.path.join(name,'avida.cfg'))
    outfile = open(os.path.join(name,'avida.cfg.auto'),'w')
    outfile.write('#avida.cfg generated by python.')
    #lines = default_file.readlines()
    

    auto_dir =  os.path.join(name,os.path.join('autogen'))
    if not os.path.isdir(auto_dir):
        os.mkdir(auto_dir)
    eve_fname = ca.make_eve(name,default_evename)
    org_fname = ca.make_org(name,default_orgname)
    ana_fname = ca.make_ana(name,default_ananame)
    env_fname = ca.make_env(name,default_envname)
    ins_fname = ca.make_ins(name,default_insname)

    computed = presets.differential_mut(name,
                                        mut = mut,
                                        geo_name = geo, 
                                        delta = delta,
                                        dim = dim,
                                        eve_fname = eve_fname,
                                        org_fname = org_fname,
                                        ana_fname = ana_fname,
                                        env_fname = env_fname,
                                        ins_fname = ins_fname,
                                        gen_name = default_genname)
    for pp in plot_params:
        pp['computed'] = computed

    print
    print 'setting print params: '
    print

    for p in plot_params:
        if p['type'] == 'ts':
            colstr = ''
            for col in p.get('command_params',[]):
                colstr += col + ' '
            printstr = p['update']+' '+p['command'] +' '+p['fname'] + ' '+colstr
            ca.alter_eve(eve_fname, printstr)
        elif p['type'] == 'grid':
            printstr = p['update'] + ' ' + p['command'] + ' ' + p['fname']
            ca.alter_eve(eve_fname, printstr)
    
    f = open(eve_fname).read()
    print f
Пример #21
0
#!/usr/bin/env python
import cb.config as cfg
import os
import cb.utils.bsub_utils as bsu
import cb.utils.bsub as bsub
import subprocess
import pipes 


sdp = cfg.dataPath('soheil')
matfiles = [os.path.join(sdp,f) for f in os.listdir(sdp) if '.mat' in f]

for control in [0,2,3]:
  for f in matfiles: 
      args = \
          f, \
          os.path.join(*( os.path.split(f)[:-1]+('mat_out_{0}'.format(control),)+os.path.split(f)[-1:])),\
          os.path.join(*( os.path.split(f)[:-1]+('mat_all_out_{0}'.format(control),)+os.path.split(f)[-1:]))
  
      for a in args: 
          if not os.path.isdir(os.path.dirname(a)):
              os.makedirs(os.path.dirname(a))
              print 'made directory: {0}'.format(os.path.dirname(a))
  
      
      script = 'test_mine'
      mat_cmd ='''\\"{3}('{0}', '{1}', '{2}', '{4}' ); exit\\"'''.format(\
          *(args + (script,)+(control,)))

      #print mat_cmd
      cstr = '''echo {0} | matlab -nojvm -nodisplay -nosplash '''.\
Пример #22
0
def run(num = 2):
    dfile = sio.loadmat(cfg.dataPath('soheil/expression_c4d_n4_intercluster.mat'))


    
    trgs, tfs = nio.getNet()
    bdgenes = nio.getBDTNP()
    bdset = set(bdgenes.keys())


    xs, ys, colors, corrs,lcorrs = [[] for i in range(5)]
    count = 0
    for k, v in bdgenes.iteritems():
        count += 1
        if count < num:
            continue
        if not trgs.has_key(k): continue
        trg = trgs[k]
        fsub = set(tfs.keys()).intersection(bdset)

        gexpr = bdgenes[k]['vals'][::50,4].flatten() #squeeze(dfile[k]) 
        fexpr = [bdgenes[fname]['vals'][::50,4].flatten() for fname in fsub]#[squeeze(dfile[fname]) for fname in fsub]
        
        
        print shape(fexpr)
        if len(fexpr )< 3: continue
        ct = mycolors.getct(len(fexpr))
        for idx, f in enumerate(fexpr):
            c = corrcoef(f, gexpr)[0,1]
            if not isfinite(c): c = 0
            lc = corrcoef(log(f), log(gexpr))[0,1]
            if not isfinite(lc): lc = 0
            corrs.append(c)
            lcorrs.append(lc)
            ys.append(gexpr)
            xs.append(f)
            colors.append([ct[idx]]* len(f))
        break
        if len(xs) > 10000:
            break
    
    cbest = argsort(-1 * abs(array(corrs)))
    

    f = plt.figure(1)
    f.clear()
    ax = f.add_subplot(111)
    inds = argsort(gexpr)

    for idx in cbest[:3]:
        import scipy.signal as ss
        import cb.utils.sigsmooth as sgs
        #k = sgs.gauss_kern(15)[8,:].flatten()
        #xconv = ss.convolve(xs[idx][inds],k) 
        
        xv = ss.medfilt(xs[idx][inds],1)
        yv = ys[idx][inds]
        print corrcoef(xv,yv)[0,1]
        print corrs[idx]
        
        ax.plot(ss.medfilt(xs[idx][inds],1), linewidth = 10, color = colors[idx][0])
        ax.plot(ys[idx][inds], linewidth = 10)
Пример #23
0
def gdraw(bgraph,cgraphs, plotname = 'default_name', measure = 'cosine'):
    #pos = nx.graphviz_layout(cgraphs['kg'])

    nodelist = bgraph.nodes()

    adjs = [ array(nx.adj_matrix(g, nodelist = nodelist)) for g in cgraphs.values() ]



    badj =  array(nx.adj_matrix(bgraph, nodelist = nodelist))
    bnrm = badj / sqrt(sum(badj**2))

    if measure == 'cosine':
        nrms = []
        bnrm = badj / sqrt(sum(badj**2))
        for a in adjs:
            n = sqrt(sum(a**2))
            nrms.append(a / n)
    
        sims = array([round(nfu.cosine_adj(a1,bnrm),8) for a1 in nrms])
    elif measure =='jaccard':
        sims = array([round(nfu.dotprod(a1,badj),8)/ (sum(a1) + sum(badj)) 
                      for a1 in adjs])

    elif measure =='specificity':
        sims = array([round(nfu.dotprod(a1,badj),8)/sum(a1) for a1 in adjs])

    elif measure =='sensitivity':
        sims = array([round(nfu.dotprod(a1,badj),8)/sum(badj) for a1 in adjs])

    else:
        raise Exception()




    srto = argsort(cgraphs.keys()) 
    #XVALs give ranks of each key index.
    xvals = argsort(srto)


    cols = map(lambda x: 
               ('flt' in x and x.count('thr') > 1) and 'orange' or
               ('flt' in x) and 'red' or
               ('thr' in x) and 'yellow' or
               ('fg' in x) and 'green' or 
               ('su' in x) and 'blue' or 
               'black', cgraphs.keys())

    yvals = sims

    f = plt.gcf()
    f = myplots.fignum(3, (.25 * len(sims),10))
    f.clear()
    ax = f.add_subplot(111)
    myplots.padded_limits(ax,xvals,yvals + [0.], margin = [.02,.02])
    ax.scatter(xvals,yvals,100, color = cols)
    ax.set_ylabel('red fly similarity ({0})'.format(measure))
    ax.set_xlabel('networks')
    ax.set_xticklabels([])
    ax.set_xticks([])

    mark_ys = [0, median(sims), mean(sims), sort(sims)[::-1][1],1]
    ax.hlines(mark_ys, *ax.get_xlim(), linestyle = ':',alpha = .2)
    

    f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_nolabels.pdf'.\
                               format(plotname,measure)))


    ax.set_xticks(range(len(srto)))
    cols_added = []
    annotes = []
    for z in zip(cgraphs.keys(),cols):
        if not z[1] in cols_added:
            annotes.append( ' '.join(z))
            cols_added.append(z[1])
        
    ax.annotate('\n'.join(annotes), 
                [1,1],xycoords = 'axes fraction', va = 'top', ha = 'right')
    
    ax.set_xticklabels([cgraphs.keys()[i] for i in srto], 
                       rotation = 90, va = 'bottom', size = 'xx-small')

    f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_labels.pdf'.\
                               format(plotname,measure)))
Пример #24
0
import cb.p.network.io as nio
import plots as nfplots
import utils as nfu
import cb.utils.colors as mycolors

import cb.utils.plots as myplots
import matplotlib.pyplot as plt
import networkx as nx
import cb.config as cfg

import itertools as it
from numpy import *
import numpy as np

figtemplate = cfg.dataPath('figs/filter/run_{0}.pdf')


def run( reset = False,
         base_net = 'kn',
         comp_net = 'fn',
         demand_bdtnp = False):
    tgs,tfs = nio.getNet()
    ktgs,ktfs = nio.getKNet()
    bd = nio.getBDTNP()
    #btgs,btfs = nio.getBDTNP()
    sush = nio.getSush(on_fail = 'compute')
    


    tfset = set(ktfs.keys())
    tgset = set(ktgs.keys())
Пример #25
0
def fetch_cluster_mapping():
    res = sio.loadmat(cfg.dataPath('soheil/ben_results/cluster_results/all/'+\
                                       'expression_c4d_n4_t{0}.mat'.format('t_4')))
    tfnames = [e[0][0] for e in res['tf_names']]
    tgnames = [e[0][0] for e in res['gene_names']]
    return tfnames, tgnames
Пример #26
0
import Bio.SeqIO as SeqIO
import Bio.Seq as seq
import Bio.Alphabet as Alphabet
import Bio.Align as al
import Bio.AlignIO as aio
import Bio.SeqRecord as sr

from numpy import *
import os, re
import cb.config as cfg
temp_dir = cfg.dataPath('alg')
if not os.path.isdir(temp_dir):
    os.mkdir(temp_dir)
import subprocess as spc


def writelines(lines, gid):
    ali = al.MultipleSeqAlignment([
        sr.SeqRecord(seq.Seq(''.join(line)),
                     '{1}{0}i'.format(i, gid),
                     description='{1}{0}d'.format(i, gid))
        for i, line in enumerate(lines[::50])
    ])
    return ali


def write_fa(lines, gid):
    fpath = os.path.join(temp_dir, '{0}_seqlines.fa'.format(gid))
    fopen = open(fpath, 'w')
    alignment = writelines(lines, gid)
    SeqIO.write(alignment, fopen, 'fasta')
Пример #27
0
import cb.config as cfg
import track
import cb.utils.memo as mem
from numpy import *
import os
import numpy as np
mousefile = cfg.dataPath('silvana/mouse_genes.bed')
peakfile = cfg.dataPath('silvana/dhs.narrowPeak')


def getTrackChrPromoters(**kwargs):
    '''
Get all of the forward promoter from a bed file
on a given chromosome>

kwargs
num:   chromosome number 
fname: bedfile path

returns
a list of the coordinates of each forward promoter.
'''
    def setTrackChrPromoters(**kwargs):
        fname = kwargs.get('fname', mousefile)
        num = kwargs.get('num', 1)
        t = track.load(fname);
        chromosome_data = t.read('chr{0}'.format(num))
        rows = [dict(zip(r.keys(),r.data)) for r in iter(chromosome_data)]
        fwd_genes = [e for e in rows if e['strand'] == 1]
        fwd_starts =dict([(e['name'],e['start']) for e in fwd_genes])
        fwd_promoters= dict([(k, [v - 2000, v - 100])
Пример #28
0
import networkx as nx
from numpy import *
import numpy as np
import cb.p.network.io as nio
import cb.utils.plots as myplots
import cb.utils.graphs.draw as gd
import cb.config as cfg

import itertools as it
figtemplate = cfg.dataPath('figs/soheil/module_signs_{0}.pdf')

def view0(modules,
          data_src = 'bdtnp',
          net_src = 'fRN',
          max_rank = 4,
          module_type = 'doubles'):
    '''
    A routine to view the sign of interaction coefficients for 
    a given transcription factor split per-cluster and per-module
    size.

    Designed to be run on the output of view_output.modules()

'''
    #COMPUTE BULK STATISTICS FOR EACH TF
    bd_data = nio.getBDTNP()
    genes = bd_data.keys()

    tfs =sorted(set(it.chain(*[k for k in modules.keys()])))
    tf_net = nx.Graph()
    tf_net.add_nodes_from(tfs)
Пример #29
0
def make_cfg(plot_params,
             name=default_name,
             geo='square',
             mut=.002,
             delta=.0005,
             dim=20):

    if not os.path.isdir(name):
        os.mkdir(name)

    import shutil
    flist = os.listdir(name)
    cfg_start = cfg.dataPath('avida_default_cfgfiles')

    for r, d, files in os.walk(cfg_start):
        for f in files:
            if f in flist:
                continue
            shutil.copyfile(os.path.join(r, f), os.path.join(name, f))

    #default_file = open(os.path.join(name,'avida.cfg'))
    outfile = open(os.path.join(name, 'avida.cfg.auto'), 'w')
    outfile.write('#avida.cfg generated by python.')
    #lines = default_file.readlines()

    auto_dir = os.path.join(name, os.path.join('autogen'))
    if not os.path.isdir(auto_dir):
        os.mkdir(auto_dir)
    eve_fname = ca.make_eve(name, default_evename)
    org_fname = ca.make_org(name, default_orgname)
    ana_fname = ca.make_ana(name, default_ananame)
    env_fname = ca.make_env(name, default_envname)
    ins_fname = ca.make_ins(name, default_insname)

    computed = presets.differential_mut(name,
                                        mut=mut,
                                        geo_name=geo,
                                        delta=delta,
                                        dim=dim,
                                        eve_fname=eve_fname,
                                        org_fname=org_fname,
                                        ana_fname=ana_fname,
                                        env_fname=env_fname,
                                        ins_fname=ins_fname,
                                        gen_name=default_genname)
    for pp in plot_params:
        pp['computed'] = computed

    print
    print 'setting print params: '
    print

    for p in plot_params:
        if p['type'] == 'ts':
            colstr = ''
            for col in p.get('command_params', []):
                colstr += col + ' '
            printstr = p['update'] + ' ' + p['command'] + ' ' + p[
                'fname'] + ' ' + colstr
            ca.alter_eve(eve_fname, printstr)
        elif p['type'] == 'grid':
            printstr = p['update'] + ' ' + p['command'] + ' ' + p['fname']
            ca.alter_eve(eve_fname, printstr)

    f = open(eve_fname).read()
    print f
Пример #30
0
import Bio.PDB as PDB
import Bio.PDB.PDBIO as PDBIO
from Bio.PDB.PDBIO import Select

import cb.config as cfg
import os, itertools as it, re


droot = cfg.dataPath('structs')
pdb_ann = os.path.join(droot, '3UGM.pdb')
pdb_plain = os.path.join(droot, '3UGM_.pdb')
pqr_file = os.path.join(droot, '3UGM.pqr')



def test(filename = 'g_princeps.pdb'):
    st = read_struct(open(filename))
def read_struct(fopen):
    pparse = PDB.PDBParser(PERMISSIVE = 0)
    struct = pparse.get_structure('tmp', fopen)
    return struct
def res_pos(filename = 'g_princeps.pdb'):
    raise Exception()

def extract_chain(struct,fout):
    io=PDBIO()
    io.set_structure(struct)
    io.save(fout, select = ChainSelector('A'))
    return 

class ChainSelector(Select):
Пример #31
0
import networkx as nx
import cb.config as cfg
import os, itertools as it
from numpy import *
import numpy as np
import cb.utils.colors as mycolors

aba_path = cfg.dataPath('brain_atlas/mouse_aba')
structures_path = os.path.join(aba_path, 'brainstructures.csv')
coords_path = os.path.join(aba_path, 'AtlasAnnotation200.sva')

def structure_voxels():
    structs_open = open(structures_path)
    slines = structs_open.readlines()
    structs_open.close()
    
    coords_open = open(coords_path)
    clines = coords_open.readlines()
    coords_open.close()
    
    crows = [[int(e) for e in  l.strip().split(',')] for l in clines[2:]]

    scols = slines[0].strip().split(',')
    srows = [dict( zip(scols,l.strip().split(','))) for l in slines[1:]]
    
    cgroups =[(k,list(g)) for k, g in  it.groupby(
        sorted(crows, key =lambda x: x[3])
        , key = lambda x: x[3])]
    
    coords = dict([(k, array([[e[0],e[1],e[2]] 
                    for e in v]))
Пример #32
0
 def save(self):
     savepath = cfg.dataPath('figs/tmp/tmp_fig_{0}.pdf').format(self.num)
     self.f.savefig(savepath)