Exemplo n.º 1
0
def draw_svm(x_expr, predictions,actual, f = 0, names = None):

    fig = plt.figure(f)
    nx = len(x_expr)

    thr = .05
    pcols = map(lambda x : x <= -thr and 'blue' or \
                    x >= thr and 'red' or \
                    'black',
                predictions)    
    ycols = map(lambda x : x <= -thr and 'blue' or \
                    x >= thr and 'red' or \
                    'black',
                actual)
    mcols = map(lambda x, y: y == x and 'none' or \
                    'black', pcols, ycols)

    pcount = 0
    pmax = (power(nx,2) - nx) /2
    for i in range(nx):
        for j in range(nx):
            ax_r = [float(i)/nx, float(j)/nx,float(1)/nx,float(1)/nx]
            ax = fig.add_axes(ax_r,frameon = False)
            
            rsml = 20
            rbig = rsml*2.5

            ax.scatter( x_expr[i],
                x_expr[j],
                rsml,
                color = pcols
                )
            ax.scatter(x_expr[i],
                       x_expr[j],
                       rbig, 
                       edgecolor = ycols,
                       color = 'none')
            scatter_errors = False
            if scatter_errors:
                ax.scatter(x_expr[i],
                           x_expr[j],
                           rbig*2, 
                           color = mcols,
                           zorder = -100,
                           edgecolor = 'none')
            myplots.hideaxes(ax)
            if names:
                namearr = [names[i],names[j]]
            else:
                namearr= [str(i),str(j)]
            if names: alpha = 1.0
            else: alpha = .6
            myplots.maketitle(ax,' vs '.join(namearr), alpha = alpha)
Exemplo n.º 2
0
def test():
	nhc = 2
        ntg = 2
        ntf_s = 2
        max_tfu = 2
        gagd = GAGD(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] )
        xs, ys = sd.synth_data(ntg,max_tfu,ntf_s)
        g, ga = gagd.sample_genome()
        gagd.init_net()
        gagd.make_cxns_from_genome(g)

        net = gagd.mynn.net
        
        f = plt.figure(0)
        f.clear()
        ax = f.add_subplot(121)
        myplots.draw_pb(ax,net)
        myplots.hideaxes(ax)
        myplots.maketitle(ax,'GANN')
        
        gagd.set_data(xs.T,ys.T)
        gagd.set_trainer()
        gagd.train()
	return
Exemplo n.º 3
0
def run(  method ='identity',index = 0, reset = 0, 
          nxmax = 100 , 
          binary_x = False, binary_y = False, 
          expression = 'time' ,
          cluster_idx = 0,
          lrn = 'tree',
          showall = False,
          tgonly = False,
          randomize_tfs = False,
          ctfs = 5,
          ctgs = 5,
          cofs = 1,
          do_normalize_cluster = True,
          cluster_tfs = True,
          verbose_expr_labels = False,
          ctype = False):
    '''
sush2.run:

run a selected learning algorithm for  a cluster.

KEYWORDS:

index  [0]: select a tf/target to model from the cluster
method ['identity']: a membership method
multi  [False]: meaningless
nxmax  [3]: max cluster members
binary_x: model x data as binary
binary_y: model y data as binary
expression ['time']: which expression series to use
cluster_idx: not yet implemented

reset

'''

    #Data assembly:
    #
    #1: Grab a list of genes of interest and 
    #   corresponding expression vectors
    #
    trg_kidxs = nu.net_trg_keyidxs()
    tf_kidxs = nu.net_tf_keyidxs()
    #
    #retrieve the list of trg/tf names present in a given cluster.
    #note that at the moment, these are fake functions that just give back
    #a little list of trgs and all of their associated TFs
    #
    #--CLUSTERS USED--


    cands = get_candidates(10,ctfs,ctgs)
    cidx = cands[cofs]
    trg_ssnames = get_trg_ss(cluster = cidx )
    tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
            
    if cluster_tfs:
        tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
    else:
        tgs, tfs = nu.parse_net()
        tg_specific = trg_ssnames[cluster_idx]
        trg_tfs = tgs[tg_specific]['tfs']
        tf_ssnames = trg_tfs


    if randomize_tfs:
        r =np.random.random_integers(0,len(tf_kidxs.keys()),len(tf_ssnames))
        tf_ssnames = []
        print 'Randomizing TFs'
        for i in r:
            tf_ssnames.append(tf_kidxs.keys()[i])

    trg_ssidxs = array([trg_kidxs[name] for name in trg_ssnames])
    tf_ssidxs = array([tf_kidxs[name] for name in tf_ssnames])
    #
    #2: Project expression data onto membership vectors
    #
    #--EXPR CLUSTERING--
    #4: Grab a list of 'membership vectors' which
    #   translate genes to x and y in the machine learning problem
    #   data merging has not yet been implemented but should be quite simple
    #
    x_memberships = get_membership(tf_ssnames, method = method)
    y_memberships = get_membership(trg_ssnames, method = method)



    if do_normalize_cluster:
        exprtype = 'clustered'
    else:
        exprtype = 'standard'

    if exprtype == 'standard':
        all_expr = non_normal_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
    else:
        all_expr = normalize_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
        
    tg_expr, tf_expr = all_expr
    x_expr = array((tf_expr)).T
    y_expr = array((tg_expr)).T


    show_clustered_expr(y_expr,x_expr, trg_ssnames, tf_ssnames,fig = 8)    

    nx, npertg = shape(x_expr)
    x_all, y_all = fold_expr(x_expr, y_expr)
    nx, nt_folded = shape(x_all)
    train_idxs, test_idxs = [],[]

    nt = npertg
    if ctype:
        nt -= 4
    tginds = range(cluster_idx *npertg,(cluster_idx*npertg)+npertg)
    
    cinds = []
    for i in range(nt_folded):

        if (divmod(i,npertg))[1] >= npertg - 4:
            cinds.append(i)

    for i in range(nt_folded):
        if ctype:
            if i in cinds and i in tginds:
                test_idxs.append(i)
        else:
            if i in tginds[:-4]:
                test_idxs.append(i)
        if tgonly:
            if i in tginds[:-4]:
                train_idxs.append(i)
        else:
            if not (i in tginds) and not (i in cinds):
                train_idxs.append(i)
        


    print 'N_TRAIN' , len(train_idxs)
    expr_fig = 0
    draw_expr(x_expr, y_expr, expr_fig = expr_fig)

    if lrn =='svm':
        model = learn_svm( x_all, y_all,
                           train_idxs = train_idxs,
                           test_idxs = test_idxs,
                           binary_x = binary_x,
                           binary_y = binary_y)
        predictions = run_svm((x_all.T)[test_idxs].T , y_all[test_idxs], model)
    if lrn in ['knn','tree','forest']:

        #pred = myrf.run_tree(x_all,y_all, train_idxs, test_idxs)
        #raise Exception()

        all_ex = myrf.get_ex(x_all,y_all)
        train_ex = all_ex.getitems([int(x) for x in train_idxs])    
        test_ex  = all_ex.getitems([int(x) for x in test_idxs])    

        #test_ex = myrf.examples_from_inds(x_all,y_all,test_idxs)
        #cl_ex = myrf.examples_from_inds(x_all,y_all,cl_idxs)
        model = myrf.OLearn(lrn, train_ex, test_ex = test_ex)
        predictions = model.predictions(test_ex)

    if lrn == 'nn':

        nhc = 2
        ntg = 2
        ntf_s = 2
        max_tfu = 2
        gf = sf.genfann(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] )
        xs, ys = sf.synth_data(ntg,max_tfu,ntf_s)
        g, ga = gf.sample_genome()
        gf.init_net()
        gf.make_cxns_from_genome(g)
        #gf.net_from_cxns(hidden_cxns,output_cxns)

        net = gf.mynn.net
        
        f = plt.figure(0)
        f.clear()
        ax = f.add_subplot(121)
        myplots.draw_pb(ax,net)
        myplots.hideaxes(ax)
        myplots.maketitle(ax,'GANN')
        
        gf.set_data(xs.T,ys.T)
        gf.set_trainer()
        gf.train()


        ax2 = f.add_subplot(122)
        myplots.draw_pb(ax2,net)
        myplots.hideaxes(ax2)
        myplots.maketitle(ax2,'GANN')


        


        return
        raise Exception()

 
        


        
        raise Exception()

        #igrps = [ arange(2)+2*i for i in range(3) ]
        #igrps = [ 
        
        raise Exception()
        gf.train()

        raise Exception()
        #gagd.MyFANN(x_all.T,y_all[newaxis,:].T,train_idxs)

    actual = y_all[test_idxs]
    
    showall = True
    if showall:
        if verbose_expr_labels:
            names = tf_ssnames
        else:
            names = None
        draw_svm(x_all[:,test_idxs],actual, predictions, f = expr_fig,names = names)

    print predictions
    print actual

    if ctype:
        forstring = 'CL Data'
    else:
        forstring = 'TS Data'
        
    namestr = trg_ssnames[cluster_idx]
    subt = 'TFs: '+','.join(tf_ssnames)

    if randomize_tfs:
        title = 'Random TF Predictions ' + forstring + ', ' +namestr
        fnum = 5
    else:
        if cluster_tfs:
            title = 'Network Cluster TF Predictions'+ forstring + ', ' +namestr
        else:
            title = 'Network UnClustered TF Predictions'+ forstring + ', ' +namestr
            
        fnum = 6

    msecov = draw_prediction(predictions,actual,fig=fnum, 
                    title = title,
                    subt = ','.join(tf_ssnames))  

    print msecov
    return msecov