Exemplo n.º 1
0
def get_candidates(n_wanted = 40, ntf = 5, ntg = 10):
    tgls, tfls = test_subs()
    
    scores = power(array(tgls) - ntg,2) + power(array(tfls) - ntf,2)
    sarg = argsort(scores)

    tgkeys = nu.net_trg_keyidxs()


    #find matching clusters matching the requirements.
    found_hash = zeros(len(tgkeys.keys()))
    matches = []
    for i in sarg:
        if len(matches) > n_wanted: break
        tgs, tls = get_subs(i)
        m = np.mean(found_hash[[tgkeys[k] for k in tgs]])
        if m < .5:
            found_hash[[tgkeys[k] for k in tgs]] +=1
            matches.append(i)
        else:
            print m, 'redundant match: ', i
 
          
        
    return matches
Exemplo n.º 2
0
def get_tf_ss(cluster = 0, trgnames = None, basic = False):
    if basic:

        trgs, tfs = nu.parse_net()
    #grab a list of the tfs regulating 10 random trgs
        tfl = []
        for k in trgs.keys()[0:50]:
            item = trgs[k]
            tfs = item['tfs']
            for t in tfs:
                if not t in tfl:
                    tfl.append(t)
    else:
        min_regs = min_tf_perc
        nat = greater(nu.net_affinity(),0)
        tgkeys = nu.net_trg_keyidxs()
        tg_sub = nat[[tgkeys[k] for k in trgnames],:]
        mem_means = mean(array(tg_sub,float),0)
        tfkeys = nu.net_tf_keyidxs()
        tfl = []
        tfhash = ['']* len(tfkeys.keys())
        for k,v in tfkeys.items(): tfhash[v] = k
        for n in nonzero(greater(mem_means,min_regs))[0]:
            tfl.append(tfhash[n])
        
    return tfl
Exemplo n.º 3
0
Arquivo: s3.py Projeto: bh0085/compbio
def coreg_keys(t0 = None, do_plot = False):
    trgs, tfs = nu.parse_net()
    btgs, btfs = trg_markov_blanket(t0)
    if do_plot:
        show_m(btgs,btfs,t0)
    
    min_wt = .3
    tgs_thr =nonzero( greater(btgs.values(),min_wt) )[0]
    keys_thr =[btgs.keys()[i] for i in tgs_thr]
    keys_thr.remove(t0)
    
    na = nu.net_affinity()
    ktf = nu.net_tf_keyidxs()
    ktg = nu.net_trg_keyidxs()

    shared = []
    threshold_sims = True
    for k in keys_thr:
    #    bg,bf = trg_markov_blanket(k, do_tgs = False)#
        row1 = na[ktg[k]] 
        row0 = na[ktg[t0]]
        if threshold_sims:
            row0=array(greater(row0,min_wt),float)
            row1=array(greater(row1,min_wt),float)
        for r in [row0,row1]: 
            l = sqrt(sum(power(r,2)))
            if l != 0: r /= l
        shared.append(sum(row0*row1))

    shared = array(shared)
    min_sharing = .4
    coreg_keys = [ keys_thr[i] for i in nonzero(greater(shared,min_sharing))[0]]
    
    if do_plot:
        plot_shared(shared)
Exemplo n.º 4
0
def get_trg_ss(cluster = 0):
    #for now... fake it!
    trgs, tfs = nu.parse_net()
    #grab a list of 10 random trgs!

    sib_arr = s3.sib_lists()
    sibs = nonzero(sib_arr[cluster])[0]
    kidxs = nu.net_trg_keyidxs()
    trgl = list(array(kidxs.keys())[sibs])
    if len(trgl) == 0: raise Exception()
    return trgl
Exemplo n.º 5
0
def one(all_means, all_mems,
        tfp, 
        axis = 'tf',
        idxs = [0,1], fig = 5
        ,choice_ax = 'x'
        ,nrml = 'axis'
        ,sorting = 'axis'):
    m = all_means[idxs[0]]
    c = all_mems[idxs[0]]
    proj=abs(tfp[idxs[0],:,:])

    m2 = all_means[idxs[1]]
    c2 = all_mems[idxs[1]]
    proj2=abs(tfp[idxs[1],:,:])

    
    sqidxs = nu.net_sq_keyidxs()
    n_tfidxs = nu.net_tf_keyidxs() 
    trgs,tfs = nu.parse_net()
    tf_sqidxs = [sqidxs[key] for key in tfs.keys()]
    gene_sqidxs = [sqidxs[key] for key in trgs.keys()]

    tfk = nu.net_tf_keyidxs()
    tgk = nu.net_trg_keyidxs()
    tf_aidx = [ tfk[key] for key in tfs.keys()]
    gene_aidx = [ tgk[key] for key in trgs.keys()]


    tfidxs = tf_aidx
 
    k = len(m)
    ntf = len(tf_sqidxs)
    ng = len(gene_sqidxs)


    print '''Getting ready to plot clusters mapped on to tf components.

--note--
In its current incarnation, netutils orders tfs by their out degree
and genes by their in degree.

Thus viewmany() orders projects by TF out degree. Left unsorted, this
is the order of the TF x axis.'''
    
    #how to normalize the image?
    #axis: equal sum for each tf over all clusters.
    #other: equal sums for each cluster in img


    
    nrml = 'axis'
    nrml_type = lambda x,y:np.max(x,y)

    sorting = 'other'
    
    
    print axis

    d0 = shape(m)[1]
    d2 = shape(m2)[1]

    show_membership = True

    if axis == 'tf':    

        if sorting == 'axis':
            img = proj
            mean_tfval = argmax(img,1) 
            c_srt = np.argsort( mean_tfval)


            img = img[c_srt,:]    

            img2 = proj2
            mean_tfval = argmax(img2,1) 
            c_srt = np.argsort( mean_tfval)

            img2 = img2[c_srt,:]    
        else:
            img = proj
            mean_tfval = argmax(img,0) 
            c_srt = np.argsort( mean_tfval)


            img = img[:,c_srt]    

            img2 = proj2
            mean_tfval = argmax(img2,0) 
            c_srt = np.argsort( mean_tfval)

            img2 = img2[:,c_srt]    
    elif axis =='gene':  
        maxgene = 200
        gsort = argsort(c)
        
        
        if d0 == 8321 and not show_membership:
            img = m[:,gsort][:,:maxgene]
        else:
            img = zeros((k,ng))
            for i in range(ng):
                img[c[i],i] = 1
        if d2 == 8321 and not show_membership:

            img2 = m2[:,gsort][:,:maxgene] 
        else:
            img2 = zeros((k,ng))
            for i in range(ng):
                img2[c2[i],i] = 1


    #normalize to generate an image
    if nrml == 'axis':
        img2 = img2/nrml_type(img2,0)[newaxis,:]
        img = img/nrml_type(img,0)[newaxis,:]
    else:
        img2 = img2/nrml_type(img2,1)[:,newaxis]
        img = img/nrml_type(img,1)[:,newaxis]       
            
    
    img /= np.max(img)
    img2 /=np.max(img)

    img_show= img[:,:,newaxis] *[0,0,1] + img2[:,:,newaxis]*[1,0,0]
    


    f = plt.figure(fig)
    f.clear()


    ax = f.add_axes([.05,.05,.9,.9])
    ax.imshow(img_show[:,:,:], aspect = 'auto')

    nc = shape(img)[0]
    xs, ys, rs, cs = [[] for i in range(4)]
    
    nchoice = 1
    if choice_ax == 'y':

        dim =  shape(img)[0]
        maxes = [argsort(img,1)[::-1][:,:nchoice],
                 argsort(img2,1)[::-1][:,:nchoice]]
    elif choice_ax == 'x':
                     
        dim =  shape(img)[1]
        maxes = [argsort(img,0)[::-1][:nchoice,:],
                 argsort(img2,0)[::-1][:nchoice,:] ]    
    else:
        raise Exception('bad axis')                

    ct = mycolors.getct(len(maxes))
    for j in range(len(maxes)):
        for i in range(dim):
            for k in range(nchoice):
                if choice_ax == 'x':
                    ys.append(maxes[j][k][i])
                    xs.append(i)
                elif choice_ax =='y':
                    xs.append(maxes[j][i][k])
                    ys.append(i)
                else:
                    raise Exception('bad axis')

                rs.append(20 + 30*(1-j))
                cs.append(ct[j])
            
    xs, ys, rs, cs  = np.array(xs),np.array(ys),np.array(rs),np.array(cs)

    ax.scatter(xs,ys,200,'1',edgecolor = 'none')
    ax.scatter(xs,ys,rs,cs,alpha = .8, edgecolor = 'none')
Exemplo n.º 6
0
def run(  method ='identity',index = 0, reset = 0, 
          nxmax = 100 , 
          binary_x = False, binary_y = False, 
          expression = 'time' ,
          cluster_idx = 0,
          lrn = 'tree',
          showall = False,
          tgonly = False,
          randomize_tfs = False,
          ctfs = 5,
          ctgs = 5,
          cofs = 1,
          do_normalize_cluster = True,
          cluster_tfs = True,
          verbose_expr_labels = False,
          ctype = False):
    '''
sush2.run:

run a selected learning algorithm for  a cluster.

KEYWORDS:

index  [0]: select a tf/target to model from the cluster
method ['identity']: a membership method
multi  [False]: meaningless
nxmax  [3]: max cluster members
binary_x: model x data as binary
binary_y: model y data as binary
expression ['time']: which expression series to use
cluster_idx: not yet implemented

reset

'''

    #Data assembly:
    #
    #1: Grab a list of genes of interest and 
    #   corresponding expression vectors
    #
    trg_kidxs = nu.net_trg_keyidxs()
    tf_kidxs = nu.net_tf_keyidxs()
    #
    #retrieve the list of trg/tf names present in a given cluster.
    #note that at the moment, these are fake functions that just give back
    #a little list of trgs and all of their associated TFs
    #
    #--CLUSTERS USED--


    cands = get_candidates(10,ctfs,ctgs)
    cidx = cands[cofs]
    trg_ssnames = get_trg_ss(cluster = cidx )
    tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
            
    if cluster_tfs:
        tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
    else:
        tgs, tfs = nu.parse_net()
        tg_specific = trg_ssnames[cluster_idx]
        trg_tfs = tgs[tg_specific]['tfs']
        tf_ssnames = trg_tfs


    if randomize_tfs:
        r =np.random.random_integers(0,len(tf_kidxs.keys()),len(tf_ssnames))
        tf_ssnames = []
        print 'Randomizing TFs'
        for i in r:
            tf_ssnames.append(tf_kidxs.keys()[i])

    trg_ssidxs = array([trg_kidxs[name] for name in trg_ssnames])
    tf_ssidxs = array([tf_kidxs[name] for name in tf_ssnames])
    #
    #2: Project expression data onto membership vectors
    #
    #--EXPR CLUSTERING--
    #4: Grab a list of 'membership vectors' which
    #   translate genes to x and y in the machine learning problem
    #   data merging has not yet been implemented but should be quite simple
    #
    x_memberships = get_membership(tf_ssnames, method = method)
    y_memberships = get_membership(trg_ssnames, method = method)



    if do_normalize_cluster:
        exprtype = 'clustered'
    else:
        exprtype = 'standard'

    if exprtype == 'standard':
        all_expr = non_normal_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
    else:
        all_expr = normalize_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
        
    tg_expr, tf_expr = all_expr
    x_expr = array((tf_expr)).T
    y_expr = array((tg_expr)).T


    show_clustered_expr(y_expr,x_expr, trg_ssnames, tf_ssnames,fig = 8)    

    nx, npertg = shape(x_expr)
    x_all, y_all = fold_expr(x_expr, y_expr)
    nx, nt_folded = shape(x_all)
    train_idxs, test_idxs = [],[]

    nt = npertg
    if ctype:
        nt -= 4
    tginds = range(cluster_idx *npertg,(cluster_idx*npertg)+npertg)
    
    cinds = []
    for i in range(nt_folded):

        if (divmod(i,npertg))[1] >= npertg - 4:
            cinds.append(i)

    for i in range(nt_folded):
        if ctype:
            if i in cinds and i in tginds:
                test_idxs.append(i)
        else:
            if i in tginds[:-4]:
                test_idxs.append(i)
        if tgonly:
            if i in tginds[:-4]:
                train_idxs.append(i)
        else:
            if not (i in tginds) and not (i in cinds):
                train_idxs.append(i)
        


    print 'N_TRAIN' , len(train_idxs)
    expr_fig = 0
    draw_expr(x_expr, y_expr, expr_fig = expr_fig)

    if lrn =='svm':
        model = learn_svm( x_all, y_all,
                           train_idxs = train_idxs,
                           test_idxs = test_idxs,
                           binary_x = binary_x,
                           binary_y = binary_y)
        predictions = run_svm((x_all.T)[test_idxs].T , y_all[test_idxs], model)
    if lrn in ['knn','tree','forest']:

        #pred = myrf.run_tree(x_all,y_all, train_idxs, test_idxs)
        #raise Exception()

        all_ex = myrf.get_ex(x_all,y_all)
        train_ex = all_ex.getitems([int(x) for x in train_idxs])    
        test_ex  = all_ex.getitems([int(x) for x in test_idxs])    

        #test_ex = myrf.examples_from_inds(x_all,y_all,test_idxs)
        #cl_ex = myrf.examples_from_inds(x_all,y_all,cl_idxs)
        model = myrf.OLearn(lrn, train_ex, test_ex = test_ex)
        predictions = model.predictions(test_ex)

    if lrn == 'nn':

        nhc = 2
        ntg = 2
        ntf_s = 2
        max_tfu = 2
        gf = sf.genfann(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] )
        xs, ys = sf.synth_data(ntg,max_tfu,ntf_s)
        g, ga = gf.sample_genome()
        gf.init_net()
        gf.make_cxns_from_genome(g)
        #gf.net_from_cxns(hidden_cxns,output_cxns)

        net = gf.mynn.net
        
        f = plt.figure(0)
        f.clear()
        ax = f.add_subplot(121)
        myplots.draw_pb(ax,net)
        myplots.hideaxes(ax)
        myplots.maketitle(ax,'GANN')
        
        gf.set_data(xs.T,ys.T)
        gf.set_trainer()
        gf.train()


        ax2 = f.add_subplot(122)
        myplots.draw_pb(ax2,net)
        myplots.hideaxes(ax2)
        myplots.maketitle(ax2,'GANN')


        


        return
        raise Exception()

 
        


        
        raise Exception()

        #igrps = [ arange(2)+2*i for i in range(3) ]
        #igrps = [ 
        
        raise Exception()
        gf.train()

        raise Exception()
        #gagd.MyFANN(x_all.T,y_all[newaxis,:].T,train_idxs)

    actual = y_all[test_idxs]
    
    showall = True
    if showall:
        if verbose_expr_labels:
            names = tf_ssnames
        else:
            names = None
        draw_svm(x_all[:,test_idxs],actual, predictions, f = expr_fig,names = names)

    print predictions
    print actual

    if ctype:
        forstring = 'CL Data'
    else:
        forstring = 'TS Data'
        
    namestr = trg_ssnames[cluster_idx]
    subt = 'TFs: '+','.join(tf_ssnames)

    if randomize_tfs:
        title = 'Random TF Predictions ' + forstring + ', ' +namestr
        fnum = 5
    else:
        if cluster_tfs:
            title = 'Network Cluster TF Predictions'+ forstring + ', ' +namestr
        else:
            title = 'Network UnClustered TF Predictions'+ forstring + ', ' +namestr
            
        fnum = 6

    msecov = draw_prediction(predictions,actual,fig=fnum, 
                    title = title,
                    subt = ','.join(tf_ssnames))  

    print msecov
    return msecov