示例#1
0
文件: gagd.py 项目: bh0085/compbio
def test():
	nhc = 2
        ntg = 2
        ntf_s = 2
        max_tfu = 2
        gagd = GAGD(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] )
        xs, ys = sd.synth_data(ntg,max_tfu,ntf_s)
        g, ga = gagd.sample_genome()
        gagd.init_net()
        gagd.make_cxns_from_genome(g)

        net = gagd.mynn.net
        
        f = plt.figure(0)
        f.clear()
        ax = f.add_subplot(121)
        myplots.draw_pb(ax,net)
        myplots.hideaxes(ax)
        myplots.maketitle(ax,'GANN')
        
        gagd.set_data(xs.T,ys.T)
        gagd.set_trainer()
        gagd.train()
	return
示例#2
0
文件: sush2.py 项目: bh0085/compbio
def run(  method ='identity',index = 0, reset = 0, 
          nxmax = 100 , 
          binary_x = False, binary_y = False, 
          expression = 'time' ,
          cluster_idx = 0,
          lrn = 'tree',
          showall = False,
          tgonly = False,
          randomize_tfs = False,
          ctfs = 5,
          ctgs = 5,
          cofs = 1,
          do_normalize_cluster = True,
          cluster_tfs = True,
          verbose_expr_labels = False,
          ctype = False):
    '''
sush2.run:

run a selected learning algorithm for  a cluster.

KEYWORDS:

index  [0]: select a tf/target to model from the cluster
method ['identity']: a membership method
multi  [False]: meaningless
nxmax  [3]: max cluster members
binary_x: model x data as binary
binary_y: model y data as binary
expression ['time']: which expression series to use
cluster_idx: not yet implemented

reset

'''

    #Data assembly:
    #
    #1: Grab a list of genes of interest and 
    #   corresponding expression vectors
    #
    trg_kidxs = nu.net_trg_keyidxs()
    tf_kidxs = nu.net_tf_keyidxs()
    #
    #retrieve the list of trg/tf names present in a given cluster.
    #note that at the moment, these are fake functions that just give back
    #a little list of trgs and all of their associated TFs
    #
    #--CLUSTERS USED--


    cands = get_candidates(10,ctfs,ctgs)
    cidx = cands[cofs]
    trg_ssnames = get_trg_ss(cluster = cidx )
    tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
            
    if cluster_tfs:
        tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
    else:
        tgs, tfs = nu.parse_net()
        tg_specific = trg_ssnames[cluster_idx]
        trg_tfs = tgs[tg_specific]['tfs']
        tf_ssnames = trg_tfs


    if randomize_tfs:
        r =np.random.random_integers(0,len(tf_kidxs.keys()),len(tf_ssnames))
        tf_ssnames = []
        print 'Randomizing TFs'
        for i in r:
            tf_ssnames.append(tf_kidxs.keys()[i])

    trg_ssidxs = array([trg_kidxs[name] for name in trg_ssnames])
    tf_ssidxs = array([tf_kidxs[name] for name in tf_ssnames])
    #
    #2: Project expression data onto membership vectors
    #
    #--EXPR CLUSTERING--
    #4: Grab a list of 'membership vectors' which
    #   translate genes to x and y in the machine learning problem
    #   data merging has not yet been implemented but should be quite simple
    #
    x_memberships = get_membership(tf_ssnames, method = method)
    y_memberships = get_membership(trg_ssnames, method = method)



    if do_normalize_cluster:
        exprtype = 'clustered'
    else:
        exprtype = 'standard'

    if exprtype == 'standard':
        all_expr = non_normal_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
    else:
        all_expr = normalize_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
        
    tg_expr, tf_expr = all_expr
    x_expr = array((tf_expr)).T
    y_expr = array((tg_expr)).T


    show_clustered_expr(y_expr,x_expr, trg_ssnames, tf_ssnames,fig = 8)    

    nx, npertg = shape(x_expr)
    x_all, y_all = fold_expr(x_expr, y_expr)
    nx, nt_folded = shape(x_all)
    train_idxs, test_idxs = [],[]

    nt = npertg
    if ctype:
        nt -= 4
    tginds = range(cluster_idx *npertg,(cluster_idx*npertg)+npertg)
    
    cinds = []
    for i in range(nt_folded):

        if (divmod(i,npertg))[1] >= npertg - 4:
            cinds.append(i)

    for i in range(nt_folded):
        if ctype:
            if i in cinds and i in tginds:
                test_idxs.append(i)
        else:
            if i in tginds[:-4]:
                test_idxs.append(i)
        if tgonly:
            if i in tginds[:-4]:
                train_idxs.append(i)
        else:
            if not (i in tginds) and not (i in cinds):
                train_idxs.append(i)
        


    print 'N_TRAIN' , len(train_idxs)
    expr_fig = 0
    draw_expr(x_expr, y_expr, expr_fig = expr_fig)

    if lrn =='svm':
        model = learn_svm( x_all, y_all,
                           train_idxs = train_idxs,
                           test_idxs = test_idxs,
                           binary_x = binary_x,
                           binary_y = binary_y)
        predictions = run_svm((x_all.T)[test_idxs].T , y_all[test_idxs], model)
    if lrn in ['knn','tree','forest']:

        #pred = myrf.run_tree(x_all,y_all, train_idxs, test_idxs)
        #raise Exception()

        all_ex = myrf.get_ex(x_all,y_all)
        train_ex = all_ex.getitems([int(x) for x in train_idxs])    
        test_ex  = all_ex.getitems([int(x) for x in test_idxs])    

        #test_ex = myrf.examples_from_inds(x_all,y_all,test_idxs)
        #cl_ex = myrf.examples_from_inds(x_all,y_all,cl_idxs)
        model = myrf.OLearn(lrn, train_ex, test_ex = test_ex)
        predictions = model.predictions(test_ex)

    if lrn == 'nn':

        nhc = 2
        ntg = 2
        ntf_s = 2
        max_tfu = 2
        gf = sf.genfann(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] )
        xs, ys = sf.synth_data(ntg,max_tfu,ntf_s)
        g, ga = gf.sample_genome()
        gf.init_net()
        gf.make_cxns_from_genome(g)
        #gf.net_from_cxns(hidden_cxns,output_cxns)

        net = gf.mynn.net
        
        f = plt.figure(0)
        f.clear()
        ax = f.add_subplot(121)
        myplots.draw_pb(ax,net)
        myplots.hideaxes(ax)
        myplots.maketitle(ax,'GANN')
        
        gf.set_data(xs.T,ys.T)
        gf.set_trainer()
        gf.train()


        ax2 = f.add_subplot(122)
        myplots.draw_pb(ax2,net)
        myplots.hideaxes(ax2)
        myplots.maketitle(ax2,'GANN')


        


        return
        raise Exception()

 
        


        
        raise Exception()

        #igrps = [ arange(2)+2*i for i in range(3) ]
        #igrps = [ 
        
        raise Exception()
        gf.train()

        raise Exception()
        #gagd.MyFANN(x_all.T,y_all[newaxis,:].T,train_idxs)

    actual = y_all[test_idxs]
    
    showall = True
    if showall:
        if verbose_expr_labels:
            names = tf_ssnames
        else:
            names = None
        draw_svm(x_all[:,test_idxs],actual, predictions, f = expr_fig,names = names)

    print predictions
    print actual

    if ctype:
        forstring = 'CL Data'
    else:
        forstring = 'TS Data'
        
    namestr = trg_ssnames[cluster_idx]
    subt = 'TFs: '+','.join(tf_ssnames)

    if randomize_tfs:
        title = 'Random TF Predictions ' + forstring + ', ' +namestr
        fnum = 5
    else:
        if cluster_tfs:
            title = 'Network Cluster TF Predictions'+ forstring + ', ' +namestr
        else:
            title = 'Network UnClustered TF Predictions'+ forstring + ', ' +namestr
            
        fnum = 6

    msecov = draw_prediction(predictions,actual,fig=fnum, 
                    title = title,
                    subt = ','.join(tf_ssnames))  

    print msecov
    return msecov