def show_clustered_expr(tge,tfe,tgnames, tfnames, nrml = True,fig = 8): f1 = plt.figure(fig) f2 = plt.figure(fig + 1) f1.clear() f2.clear() ax1 = f1.add_subplot(111) ax2 = f2.add_subplot(111) tgct = colors.getct(len(tgnames)) tfct = colors.getct(len(tfnames)) for i in range(len(tge)): ax1.plot(tge[i],color = tgct[i]) myplots.color_legend(f1,tgct,tgnames, ax = ax1,pos = 4) tstr = 'Target Expression Levels' if nrml: tstr += '(Normalized)' myplots.maketitle(ax1,tstr) for i in range(len(tfe)): ax2.plot(tfe[i],color = tfct[i]) myplots.color_legend(f2,tfct,tfnames, ax = ax2,pos = 4) tstr = 'TF Expression Levels' if nrml: tstr += '(Normalized)' myplots.maketitle(ax2,tstr)
def show_m(btgs,btfs,name): f = plt.figure(1) f.clear() ax1 = f.add_subplot(211) ax1.plot(sorted(btfs.values())) ax2 = f.add_subplot(212) ax2.plot(sorted(btgs.values())) myplots.maketitle(ax1, 'TFS in the Markov blanket for '+name) myplots.maketitle(ax2, 'TGS in the Markov blanket for '+name)
def view_in(): na = nu.net_affinity() f = plt.figure(0) f.clear() ax = f.add_subplot(111) in_degree = sum(na, 0) srt = argsort(in_degree) sm.seismic([in_degree[srt]], ax=ax) myplots.maketitle(ax, 'In degree, sorted')
def draw_svm(x_expr, predictions,actual, f = 0, names = None): fig = plt.figure(f) nx = len(x_expr) thr = .05 pcols = map(lambda x : x <= -thr and 'blue' or \ x >= thr and 'red' or \ 'black', predictions) ycols = map(lambda x : x <= -thr and 'blue' or \ x >= thr and 'red' or \ 'black', actual) mcols = map(lambda x, y: y == x and 'none' or \ 'black', pcols, ycols) pcount = 0 pmax = (power(nx,2) - nx) /2 for i in range(nx): for j in range(nx): ax_r = [float(i)/nx, float(j)/nx,float(1)/nx,float(1)/nx] ax = fig.add_axes(ax_r,frameon = False) rsml = 20 rbig = rsml*2.5 ax.scatter( x_expr[i], x_expr[j], rsml, color = pcols ) ax.scatter(x_expr[i], x_expr[j], rbig, edgecolor = ycols, color = 'none') scatter_errors = False if scatter_errors: ax.scatter(x_expr[i], x_expr[j], rbig*2, color = mcols, zorder = -100, edgecolor = 'none') myplots.hideaxes(ax) if names: namearr = [names[i],names[j]] else: namearr= [str(i),str(j)] if names: alpha = 1.0 else: alpha = .6 myplots.maketitle(ax,' vs '.join(namearr), alpha = alpha)
def makePlots(self, name="No Name"): xtrain, ytrain = self.xyTrain() xtest, ytest = self.xyTest() ytrain_predicted = self.predictTraining() ytest_predicted = self.predictTest() ny = len(ytrain) f = plt.figure(1) f.clear() ax0 = f.add_subplot("211") f1 = plt.figure(2) f1.clear() ax1 = f1.add_subplot("211") ct = mycolors.getct(ny) for actual, predicted, ax, subtitle in [ [ytest, ytest_predicted, ax0, "test predictions"], [ytrain, ytrain_predicted, ax1, "training predictions"], ]: for i in range(len(actual)): lplots.plotPredictions(actual[i], predicted[i], ax, color=ct[i]) myplots.maketitle(ax, name, subtitle=subtitle)
def heatMapGene(gene_name = 'FBgn0014931', model_class = None, res = 5, prediction ='training'): plt.clf() if model_class == None: model_class = om.NuSVMModel xvals,yvals,coupling = gVals(gene_name) learner = l.Learner(xvals,yvals,coupling) vals = learner.testParams(model_class, prediction=prediction ,res = res, dim = 2) err = vals['test_rms'] annotations = vals['pdicts'] f=plt.gcf() ax = f.add_subplot('211') ax2 = f.add_subplot('212') ax = hm.heatMap(err, annotations,axes = ax) myplots.maketitle(ax, 'gene: {0}'.format(gene_name), 'heatmap for different learning parameters') preds = vals['test_preds'] best_p = preds[unravel_index(argmin(vals['test_rms']), shape(preds)[:2])] worst_p = preds[unravel_index(argmax(vals['test_rms']), shape(preds)[:2])] ax2.plot(worst_p, linestyle = ':', linewidth = 4 , color = 'blue') ax2.plot(best_p, linestyle = ':', linewidth = 4, color = 'red') ax2.plot(vals['actual_preds'][0])
def test(): nhc = 2 ntg = 2 ntf_s = 2 max_tfu = 2 gagd = GAGD(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] ) xs, ys = sd.synth_data(ntg,max_tfu,ntf_s) g, ga = gagd.sample_genome() gagd.init_net() gagd.make_cxns_from_genome(g) net = gagd.mynn.net f = plt.figure(0) f.clear() ax = f.add_subplot(121) myplots.draw_pb(ax,net) myplots.hideaxes(ax) myplots.maketitle(ax,'GANN') gagd.set_data(xs.T,ys.T) gagd.set_trainer() gagd.train() return
def draw_prediction(predictions, actual,fig = 0, match_mean = True,title = '',subt = ''): f = plt.figure(fig) f.clear() ax = f.add_subplot(111) xax = arange(0,len(predictions)) p2 = predictions - mean(predictions) if std(p2) != 0: p2 = p2/std(p2) *std(actual) p2 = p2 + float(np.mean(actual)) ax.plot(xax,p2) mse_zscore =np.sum( power(( p2 - actual),2)) cov =np.sum( np.corrcoef( p2, actual)[0,1]) if cov != cov: cov = 0 ax.plot(xax,actual) eps = std(actual) minline = actual - eps maxline = actual + eps ax.plot(xax,maxline,alpha = .3) ax.plot(xax,minline,alpha = .3) ax.fill_between(xax,p2, maxline, where = greater(p2,maxline), color = 'red', interpolate = True) ax.fill_between(xax,p2, minline, where = less(p2, minline), color = 'blue', interpolate = True) myplots.maketitle(ax,title, subtitle = 'Validation MSE: '+str(round(mse_zscore,3))+'\nValidation Correlation: '+str(round(cov,3))) myplots.label_lr(ax,subt) f.show() return [mse_zscore,cov]
def check_network(net_name = 'binding', dataset_name = 'reinitz', data_ofs = 4, max_edges = -1, node_restriction = 'reinitz'): reinitz_keys =set( get_reinitz_data()[1].keys()) if dataset_name == 'reinitz': coords, values = get_reinitz_data(ofs = data_ofs) elif dataset_name == 'bdtnp': data = nio.getBDTNP() meta = nio.getBDTNP(misc = True) values = dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) coords = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]]) elif dataset_name == 'tc': data = nio.getTC() if node_restriction == 'reinitz': data = dict([(k,v) for k,v in data.iteritems() if k in reinitz_keys]) #values = dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) #coords = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]]) values = data else: raise Exception('data set {0} not yet implemented'.format(dataset_name)) nets = comp.get_graphs() if net_name == 'binding': network = nets['bn'] elif net_name == 'unsup': network = nets['unsup'] elif net_name == 'logistic': network = nets['logistic'] elif net_name =='clusters': network = get_soheil_network(max_edges = max_edges, node_restriction = values.keys()) else: raise Exception('type not implemented: {0}'.format(net_name)) nodes = values.keys() nodes_allowed = set(nodes) f = myplots.fignum(1,(8,8)) ax = f.add_subplot(111) targets = {} edges = [] for n in nodes: targets[n] = [] if n in network: targets[n] = nodes_allowed.intersection(network[n].keys()) xax = linspace(-1,1,20) edges = list(it.chain(*[[(e,v2) for v2 in v] for e, v in targets.iteritems()])) ccofs = [e for e in [ corrcoef(values[tf], values[tg])[0,1] for tf, tg in edges] if not isnan(e)] count, kde = make_kde(ccofs) ax.hist(ccofs,xax,label = net_name) h =histogram(ccofs,xax) ax.fill_between(xax,kde(xax)*max(h[0]),label = net_name,zorder = 1,alpha = .5) myplots.maketitle(ax,'edge correlations kde for {0}'.format('\n{2} data (data offset={0})\n(net_name={1})\n(max_edges={3})' .format(data_ofs, net_name, dataset_name, max_edges) ),\ subtitle = 'n_edges = {0}'.format(len(edges))) ax.legend() f.savefig(myplots.figpath('network_edge_corrs_data_ofs={0}_net={1}_expr={2}_max_edges={3}' .format(data_ofs,net_name,dataset_name, max_edges)))
def run( method ='identity',index = 0, reset = 0, nxmax = 100 , binary_x = False, binary_y = False, expression = 'time' , cluster_idx = 0, lrn = 'tree', showall = False, tgonly = False, randomize_tfs = False, ctfs = 5, ctgs = 5, cofs = 1, do_normalize_cluster = True, cluster_tfs = True, verbose_expr_labels = False, ctype = False): ''' sush2.run: run a selected learning algorithm for a cluster. KEYWORDS: index [0]: select a tf/target to model from the cluster method ['identity']: a membership method multi [False]: meaningless nxmax [3]: max cluster members binary_x: model x data as binary binary_y: model y data as binary expression ['time']: which expression series to use cluster_idx: not yet implemented reset ''' #Data assembly: # #1: Grab a list of genes of interest and # corresponding expression vectors # trg_kidxs = nu.net_trg_keyidxs() tf_kidxs = nu.net_tf_keyidxs() # #retrieve the list of trg/tf names present in a given cluster. #note that at the moment, these are fake functions that just give back #a little list of trgs and all of their associated TFs # #--CLUSTERS USED-- cands = get_candidates(10,ctfs,ctgs) cidx = cands[cofs] trg_ssnames = get_trg_ss(cluster = cidx ) tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames) if cluster_tfs: tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames) else: tgs, tfs = nu.parse_net() tg_specific = trg_ssnames[cluster_idx] trg_tfs = tgs[tg_specific]['tfs'] tf_ssnames = trg_tfs if randomize_tfs: r =np.random.random_integers(0,len(tf_kidxs.keys()),len(tf_ssnames)) tf_ssnames = [] print 'Randomizing TFs' for i in r: tf_ssnames.append(tf_kidxs.keys()[i]) trg_ssidxs = array([trg_kidxs[name] for name in trg_ssnames]) tf_ssidxs = array([tf_kidxs[name] for name in tf_ssnames]) # #2: Project expression data onto membership vectors # #--EXPR CLUSTERING-- #4: Grab a list of 'membership vectors' which # translate genes to x and y in the machine learning problem # data merging has not yet been implemented but should be quite simple # x_memberships = get_membership(tf_ssnames, method = method) y_memberships = get_membership(trg_ssnames, method = method) if do_normalize_cluster: exprtype = 'clustered' else: exprtype = 'standard' if exprtype == 'standard': all_expr = non_normal_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype) else: all_expr = normalize_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype) tg_expr, tf_expr = all_expr x_expr = array((tf_expr)).T y_expr = array((tg_expr)).T show_clustered_expr(y_expr,x_expr, trg_ssnames, tf_ssnames,fig = 8) nx, npertg = shape(x_expr) x_all, y_all = fold_expr(x_expr, y_expr) nx, nt_folded = shape(x_all) train_idxs, test_idxs = [],[] nt = npertg if ctype: nt -= 4 tginds = range(cluster_idx *npertg,(cluster_idx*npertg)+npertg) cinds = [] for i in range(nt_folded): if (divmod(i,npertg))[1] >= npertg - 4: cinds.append(i) for i in range(nt_folded): if ctype: if i in cinds and i in tginds: test_idxs.append(i) else: if i in tginds[:-4]: test_idxs.append(i) if tgonly: if i in tginds[:-4]: train_idxs.append(i) else: if not (i in tginds) and not (i in cinds): train_idxs.append(i) print 'N_TRAIN' , len(train_idxs) expr_fig = 0 draw_expr(x_expr, y_expr, expr_fig = expr_fig) if lrn =='svm': model = learn_svm( x_all, y_all, train_idxs = train_idxs, test_idxs = test_idxs, binary_x = binary_x, binary_y = binary_y) predictions = run_svm((x_all.T)[test_idxs].T , y_all[test_idxs], model) if lrn in ['knn','tree','forest']: #pred = myrf.run_tree(x_all,y_all, train_idxs, test_idxs) #raise Exception() all_ex = myrf.get_ex(x_all,y_all) train_ex = all_ex.getitems([int(x) for x in train_idxs]) test_ex = all_ex.getitems([int(x) for x in test_idxs]) #test_ex = myrf.examples_from_inds(x_all,y_all,test_idxs) #cl_ex = myrf.examples_from_inds(x_all,y_all,cl_idxs) model = myrf.OLearn(lrn, train_ex, test_ex = test_ex) predictions = model.predictions(test_ex) if lrn == 'nn': nhc = 2 ntg = 2 ntf_s = 2 max_tfu = 2 gf = sf.genfann(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] ) xs, ys = sf.synth_data(ntg,max_tfu,ntf_s) g, ga = gf.sample_genome() gf.init_net() gf.make_cxns_from_genome(g) #gf.net_from_cxns(hidden_cxns,output_cxns) net = gf.mynn.net f = plt.figure(0) f.clear() ax = f.add_subplot(121) myplots.draw_pb(ax,net) myplots.hideaxes(ax) myplots.maketitle(ax,'GANN') gf.set_data(xs.T,ys.T) gf.set_trainer() gf.train() ax2 = f.add_subplot(122) myplots.draw_pb(ax2,net) myplots.hideaxes(ax2) myplots.maketitle(ax2,'GANN') return raise Exception() raise Exception() #igrps = [ arange(2)+2*i for i in range(3) ] #igrps = [ raise Exception() gf.train() raise Exception() #gagd.MyFANN(x_all.T,y_all[newaxis,:].T,train_idxs) actual = y_all[test_idxs] showall = True if showall: if verbose_expr_labels: names = tf_ssnames else: names = None draw_svm(x_all[:,test_idxs],actual, predictions, f = expr_fig,names = names) print predictions print actual if ctype: forstring = 'CL Data' else: forstring = 'TS Data' namestr = trg_ssnames[cluster_idx] subt = 'TFs: '+','.join(tf_ssnames) if randomize_tfs: title = 'Random TF Predictions ' + forstring + ', ' +namestr fnum = 5 else: if cluster_tfs: title = 'Network Cluster TF Predictions'+ forstring + ', ' +namestr else: title = 'Network UnClustered TF Predictions'+ forstring + ', ' +namestr fnum = 6 msecov = draw_prediction(predictions,actual,fig=fnum, title = title, subt = ','.join(tf_ssnames)) print msecov return msecov