def test(): nhc = 2 ntg = 2 ntf_s = 2 max_tfu = 2 gagd = GAGD(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] ) xs, ys = sd.synth_data(ntg,max_tfu,ntf_s) g, ga = gagd.sample_genome() gagd.init_net() gagd.make_cxns_from_genome(g) net = gagd.mynn.net f = plt.figure(0) f.clear() ax = f.add_subplot(121) myplots.draw_pb(ax,net) myplots.hideaxes(ax) myplots.maketitle(ax,'GANN') gagd.set_data(xs.T,ys.T) gagd.set_trainer() gagd.train() return
def run( method ='identity',index = 0, reset = 0, nxmax = 100 , binary_x = False, binary_y = False, expression = 'time' , cluster_idx = 0, lrn = 'tree', showall = False, tgonly = False, randomize_tfs = False, ctfs = 5, ctgs = 5, cofs = 1, do_normalize_cluster = True, cluster_tfs = True, verbose_expr_labels = False, ctype = False): ''' sush2.run: run a selected learning algorithm for a cluster. KEYWORDS: index [0]: select a tf/target to model from the cluster method ['identity']: a membership method multi [False]: meaningless nxmax [3]: max cluster members binary_x: model x data as binary binary_y: model y data as binary expression ['time']: which expression series to use cluster_idx: not yet implemented reset ''' #Data assembly: # #1: Grab a list of genes of interest and # corresponding expression vectors # trg_kidxs = nu.net_trg_keyidxs() tf_kidxs = nu.net_tf_keyidxs() # #retrieve the list of trg/tf names present in a given cluster. #note that at the moment, these are fake functions that just give back #a little list of trgs and all of their associated TFs # #--CLUSTERS USED-- cands = get_candidates(10,ctfs,ctgs) cidx = cands[cofs] trg_ssnames = get_trg_ss(cluster = cidx ) tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames) if cluster_tfs: tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames) else: tgs, tfs = nu.parse_net() tg_specific = trg_ssnames[cluster_idx] trg_tfs = tgs[tg_specific]['tfs'] tf_ssnames = trg_tfs if randomize_tfs: r =np.random.random_integers(0,len(tf_kidxs.keys()),len(tf_ssnames)) tf_ssnames = [] print 'Randomizing TFs' for i in r: tf_ssnames.append(tf_kidxs.keys()[i]) trg_ssidxs = array([trg_kidxs[name] for name in trg_ssnames]) tf_ssidxs = array([tf_kidxs[name] for name in tf_ssnames]) # #2: Project expression data onto membership vectors # #--EXPR CLUSTERING-- #4: Grab a list of 'membership vectors' which # translate genes to x and y in the machine learning problem # data merging has not yet been implemented but should be quite simple # x_memberships = get_membership(tf_ssnames, method = method) y_memberships = get_membership(trg_ssnames, method = method) if do_normalize_cluster: exprtype = 'clustered' else: exprtype = 'standard' if exprtype == 'standard': all_expr = non_normal_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype) else: all_expr = normalize_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype) tg_expr, tf_expr = all_expr x_expr = array((tf_expr)).T y_expr = array((tg_expr)).T show_clustered_expr(y_expr,x_expr, trg_ssnames, tf_ssnames,fig = 8) nx, npertg = shape(x_expr) x_all, y_all = fold_expr(x_expr, y_expr) nx, nt_folded = shape(x_all) train_idxs, test_idxs = [],[] nt = npertg if ctype: nt -= 4 tginds = range(cluster_idx *npertg,(cluster_idx*npertg)+npertg) cinds = [] for i in range(nt_folded): if (divmod(i,npertg))[1] >= npertg - 4: cinds.append(i) for i in range(nt_folded): if ctype: if i in cinds and i in tginds: test_idxs.append(i) else: if i in tginds[:-4]: test_idxs.append(i) if tgonly: if i in tginds[:-4]: train_idxs.append(i) else: if not (i in tginds) and not (i in cinds): train_idxs.append(i) print 'N_TRAIN' , len(train_idxs) expr_fig = 0 draw_expr(x_expr, y_expr, expr_fig = expr_fig) if lrn =='svm': model = learn_svm( x_all, y_all, train_idxs = train_idxs, test_idxs = test_idxs, binary_x = binary_x, binary_y = binary_y) predictions = run_svm((x_all.T)[test_idxs].T , y_all[test_idxs], model) if lrn in ['knn','tree','forest']: #pred = myrf.run_tree(x_all,y_all, train_idxs, test_idxs) #raise Exception() all_ex = myrf.get_ex(x_all,y_all) train_ex = all_ex.getitems([int(x) for x in train_idxs]) test_ex = all_ex.getitems([int(x) for x in test_idxs]) #test_ex = myrf.examples_from_inds(x_all,y_all,test_idxs) #cl_ex = myrf.examples_from_inds(x_all,y_all,cl_idxs) model = myrf.OLearn(lrn, train_ex, test_ex = test_ex) predictions = model.predictions(test_ex) if lrn == 'nn': nhc = 2 ntg = 2 ntf_s = 2 max_tfu = 2 gf = sf.genfann(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] ) xs, ys = sf.synth_data(ntg,max_tfu,ntf_s) g, ga = gf.sample_genome() gf.init_net() gf.make_cxns_from_genome(g) #gf.net_from_cxns(hidden_cxns,output_cxns) net = gf.mynn.net f = plt.figure(0) f.clear() ax = f.add_subplot(121) myplots.draw_pb(ax,net) myplots.hideaxes(ax) myplots.maketitle(ax,'GANN') gf.set_data(xs.T,ys.T) gf.set_trainer() gf.train() ax2 = f.add_subplot(122) myplots.draw_pb(ax2,net) myplots.hideaxes(ax2) myplots.maketitle(ax2,'GANN') return raise Exception() raise Exception() #igrps = [ arange(2)+2*i for i in range(3) ] #igrps = [ raise Exception() gf.train() raise Exception() #gagd.MyFANN(x_all.T,y_all[newaxis,:].T,train_idxs) actual = y_all[test_idxs] showall = True if showall: if verbose_expr_labels: names = tf_ssnames else: names = None draw_svm(x_all[:,test_idxs],actual, predictions, f = expr_fig,names = names) print predictions print actual if ctype: forstring = 'CL Data' else: forstring = 'TS Data' namestr = trg_ssnames[cluster_idx] subt = 'TFs: '+','.join(tf_ssnames) if randomize_tfs: title = 'Random TF Predictions ' + forstring + ', ' +namestr fnum = 5 else: if cluster_tfs: title = 'Network Cluster TF Predictions'+ forstring + ', ' +namestr else: title = 'Network UnClustered TF Predictions'+ forstring + ', ' +namestr fnum = 6 msecov = draw_prediction(predictions,actual,fig=fnum, title = title, subt = ','.join(tf_ssnames)) print msecov return msecov