def triple_venn_consv(): hints = co.load_havug_ints() ppi_cxs, clean_cxs, corconsv = ppi.load_training_complexes("Hs", "Dm") cints = co.pairs_from_complexes(ut.i1(ppi_cxs)) # exclude huge ones ints23 = ut.loadpy(ut.bigd("../23_collapsenodes/Hs_filtorth025_withsc_2sp_refilt2sp_cxs_cxppis_clust27_532cxs"))[1] ints3 = [cp.consv_pairs(i, h2d) for i in ints23, hints, cints] cp.triple_venn(ints3, ["map23", "havug", "corum"])
def compact(d, scoref, precision=3): sys.path.append(d+'/..') import utils as ut compactf = '%s.p%s.txt.gz' % (scoref, precision) print compactf, precision ascores = (ut.loadpy(scoref) if scoref.endswith('.py') else np.loadtxt(scoref)) formatstring = '%' + '0.%se' % precision np.savetxt(compactf, ascores, fmt=formatstring, delimiter='\t')
def precalc_scores(scoref, dtype='f2'): """ Also zero out the diagonal to more efficiently remove all self-interactions up-front. """ # NOTE to change dtype you must change it in loadtxt below!! save_compact = ut.config()['save_compact_corrs'] compactf = '%s.%s.pyd' % (scoref, dtype) if os.path.exists(compactf): mat = ut.loadpy(compactf) inds = range(mat.shape[0]) # always square score matrix mat[inds, inds] = 0 return mat else: ascores = np.loadtxt(scoref, dtype='f2') if save_compact: print 'saving compact', compactf ut.savepy(ascores, compactf) return ascores
# Plot the feature importances of the trees and of the forest if do_plot: import pylab as pl pl.figure() pl.title("Feature importances") for tree in forest.estimators_: pl.plot(indnums, tree.feature_importances_[indices], "r") pl.plot(indnums, importances[indices], "b") pl.show() feats, weights = zip(*ranked) return list(feats), list(weights) if __name__ == '__main__': if len(sys.argv) < 4: sys.exit("usage: python ml.py train_test feats_f clf_type \ donorm kwarg1_val1-kwarg2-val2") ttf = sys.argv[1] tt = np.load(ttf) feats = ut.loadpy(sys.argv[2]) k = sys.argv[3] do_norm = sys.argv[4] kvs = sys.argv[5] kwargs = dict([tuple(kv.split('_')) for kv in kvs.split('-')]) \ if kvs else {} clf = tree(**kwargs) if k=='tree' else svm(kernel=k, **kwargs) ts = [('%s features, %s kernel, norm: %s, %s' %(n,k,do_norm, kvs), fit_and_test([fe.keep_cols(t, ut.i0(feats[:n])) for t in tt], clf, norm=do_norm)) for n in 20,30,40,50] ut.savepy(ts, 'ts_%s_%s_%s_%s' %(k,do_norm,kvs,ttf))
if len([1 for m in maxes[i] if m > cutoff]) >= nsp] arrfilt = arr[exceed_inds] if do_counts: orig_sp_counts = [len([1 for m in maxes[i] if m > cutoff]) for i in range(len(arr))] sp_counts_filt = [c for c in orig_sp_counts if c >=nsp] pd_spcounts = pd.PairDict([[arrfilt[i][0],arrfilt[i][1], sp_counts_filt[i]] for i in range(len(arrfilt))]) else: pd_spcounts = None return arrfilt, pd_spcounts def filter_nsp_nocounts(arr, **kwargs): return filter_nsp(arr, do_counts=False, **kwargs)[0] if __name__ == '__main__': nargs = len(sys.argv) if nargs < 6: sys.exit("usage: python myml.py f_examples f_classifier \ perslice islice path") exs = np.load(sys.argv[1]) clf = ut.loadpy(sys.argv[2]) perslice = int(sys.argv[3]) i = int(sys.argv[4]) path = sys.argv[5] exs_slice = exs[i*perslice:(i+1)*perslice] del exs classify_slice(clf, exs_slice, 100000, savef=(path+str(i)+'_'), maintain=False, startslice=0)