Пример #1
0
def triple_venn_consv():
    hints = co.load_havug_ints()
    ppi_cxs, clean_cxs, corconsv = ppi.load_training_complexes("Hs", "Dm")
    cints = co.pairs_from_complexes(ut.i1(ppi_cxs))  # exclude huge ones
    ints23 = ut.loadpy(ut.bigd("../23_collapsenodes/Hs_filtorth025_withsc_2sp_refilt2sp_cxs_cxppis_clust27_532cxs"))[1]
    ints3 = [cp.consv_pairs(i, h2d) for i in ints23, hints, cints]
    cp.triple_venn(ints3, ["map23", "havug", "corum"])
def compact(d, scoref, precision=3):
    sys.path.append(d+'/..')
    import utils as ut
    compactf = '%s.p%s.txt.gz' % (scoref, precision)
    print compactf, precision
    ascores = (ut.loadpy(scoref) if scoref.endswith('.py') else
            np.loadtxt(scoref))
    formatstring = '%' + '0.%se' % precision
    np.savetxt(compactf, ascores, fmt=formatstring, delimiter='\t')
Пример #3
0
def precalc_scores(scoref, dtype='f2'):
    """
    Also zero out the diagonal to more efficiently remove all self-interactions
    up-front.
    """
    # NOTE to change dtype you must change it in loadtxt below!!
    save_compact = ut.config()['save_compact_corrs'] 
    compactf = '%s.%s.pyd' % (scoref, dtype)
    if os.path.exists(compactf): 
        mat = ut.loadpy(compactf)
        inds = range(mat.shape[0]) # always square score matrix
        mat[inds, inds] = 0
        return mat
    else:
        ascores = np.loadtxt(scoref, dtype='f2')
        if save_compact:
            print 'saving compact', compactf
            ut.savepy(ascores, compactf)
        return ascores
Пример #4
0
def precalc_scores(scoref, dtype='f2'):
    """
    Also zero out the diagonal to more efficiently remove all self-interactions
    up-front.
    """
    # NOTE to change dtype you must change it in loadtxt below!!
    save_compact = ut.config()['save_compact_corrs'] 
    compactf = '%s.%s.pyd' % (scoref, dtype)
    if os.path.exists(compactf): 
        mat = ut.loadpy(compactf)
        inds = range(mat.shape[0]) # always square score matrix
        mat[inds, inds] = 0
        return mat
    else:
        ascores = np.loadtxt(scoref, dtype='f2')
        if save_compact:
            print 'saving compact', compactf
            ut.savepy(ascores, compactf)
        return ascores
Пример #5
0
    # Plot the feature importances of the trees and of the forest
    if do_plot:
        import pylab as pl
        pl.figure()
        pl.title("Feature importances")
        for tree in forest.estimators_:
            pl.plot(indnums, tree.feature_importances_[indices], "r")
        pl.plot(indnums, importances[indices], "b")
        pl.show()
    feats, weights = zip(*ranked)
    return list(feats), list(weights)

if __name__ == '__main__':
    if len(sys.argv) < 4:
        sys.exit("usage: python ml.py train_test feats_f clf_type \
               donorm kwarg1_val1-kwarg2-val2")
    ttf = sys.argv[1]
    tt = np.load(ttf)
    feats = ut.loadpy(sys.argv[2])
    k = sys.argv[3]
    do_norm = sys.argv[4]
    kvs = sys.argv[5]
    kwargs = dict([tuple(kv.split('_')) for kv in kvs.split('-')]) \
        if kvs else {}
    clf = tree(**kwargs) if k=='tree' else svm(kernel=k, **kwargs)
    ts =  [('%s features, %s kernel, norm: %s, %s' %(n,k,do_norm, kvs),
        fit_and_test([fe.keep_cols(t, ut.i0(feats[:n])) for t in tt], 
                        clf, norm=do_norm)) 
        for n in 20,30,40,50]
    ut.savepy(ts, 'ts_%s_%s_%s_%s' %(k,do_norm,kvs,ttf))
Пример #6
0
                   if len([1 for m in maxes[i] if m > cutoff]) >= nsp]
    arrfilt = arr[exceed_inds]
    if do_counts:
        orig_sp_counts = [len([1 for m in maxes[i] if m > cutoff]) for i in
                range(len(arr))]
        sp_counts_filt = [c for c in orig_sp_counts if c >=nsp]
        pd_spcounts = pd.PairDict([[arrfilt[i][0],arrfilt[i][1],
            sp_counts_filt[i]] 
            for i in range(len(arrfilt))])
    else:
        pd_spcounts = None
    return arrfilt, pd_spcounts

def filter_nsp_nocounts(arr, **kwargs):
    return filter_nsp(arr, do_counts=False, **kwargs)[0]

if __name__ == '__main__':
    nargs = len(sys.argv)
    if nargs < 6:
        sys.exit("usage: python myml.py f_examples f_classifier \
               perslice islice path")
    exs = np.load(sys.argv[1])
    clf = ut.loadpy(sys.argv[2])
    perslice = int(sys.argv[3])
    i = int(sys.argv[4])
    path = sys.argv[5]
    exs_slice = exs[i*perslice:(i+1)*perslice]
    del exs
    classify_slice(clf, exs_slice, 100000, savef=(path+str(i)+'_'),
            maintain=False, startslice=0)