예제 #1
0
def multi_clust(tested, score_cutoffs=None, length_cutoffs=None,
        fracs=[.012,.014], frac_retain=.1, ds=[.1,.25,.3,.35], ms=[.1,.15,.2],
        penalties=[.1,1], overlaps=[.55], haircuts=[0,.2], max_pval=1,
        savef=None, runid=None, show_stats=True, pres=None, gold_nspecies=1,
        gold_splits=None, gold_minlen=3, mdprod_min=.01, **kwargs):
    runid = runid or random.randrange(1,1000)
    fracs = (fracs if fracs is not None 
        else [cl.n_thresh(tested, s)/len(tested) for s in score_cutoffs] if score_cutoffs is not None
        else [le/len(tested) for le in length_cutoffs])
    print "random id:", runid
    clusts = []
    params = [fracs, ds, ms, penalties, overlaps, haircuts]
    products = it.product(*params)
    for (f,d,m,p,o,h) in products:
        if d*m >= mdprod_min:
            cxstruct = cl.filter_clust(ut.list_frac(tested, f),
                    ut.list_frac(tested, frac_retain), merge_cutoff=o, negmult=m, min_density=d,
                    runid=runid, penalty=p, max_pval=max_pval, max_overlap=o,
                    haircut=h, **kwargs)
            cxstruct.params = ('density=%s,frac=%s,f_retain=%s,negmult=%s,penalty=%s,max_overlap=%s,haircut=%s' % (d,f,frac_retain,m,p,o,h))
            clusts.append(cxstruct)
            if show_stats and len(cxstruct.cxs)>0:
                if pres is not None and gold_splits is not None:
                    out = cp.select_best(cp.result_stats(pres.species, gold_splits,
                        clusts[-1:], gold_nspecies, min_gold_size=gold_minlen))
                else:
                    print "Can't show stats: pres and gold_splits required."
            if savef and (len(clusts) % 10 == 1):
                ut.savepy(clusts, ut.pre_ext(savef, "clusts_temp_%s_%s" % (ut.date(),
                    runid)))
    return clusts, runid
예제 #2
0
def multi_stage2_clust(clusts, ppis_all, runid=None, frac_retain=.1,
        I_params=[2,3,4], **kwargs):
    clusts2 = []
    runid = runid or random.randrange(1,1000)
    for cxstruct1 in clusts:
        for I in I_params:
            ppis_retain = ut.list_frac(ppis_all, frac_retain)
            cxstruct2 = cl.stage2_clust(cxstruct1.cxppis, ppis_retain,
                    cxstruct1.cxs, runid=runid, I=I, **kwargs)
            cxstruct2.params = cxstruct1.params + ",I_mcl=%s" % I
            clusts2.append(cxstruct2)
    return clusts2
예제 #3
0
def rescue_ppis(pres, obs, n_rescue, cutoff_fracs=20, cutoff_score=0.9,
        exclude_ppis=None, frac_retain=0.1, cltype='mcl', merge_cutoff=0.55,
        I=3, negmult=1):
    ppis_counted = ev.ppis_fracspassing_counts(pres.ppis[:n_rescue], obs,
            exclude_ppis=(exclude_ppis or pres.cxppis))
    print "%s PPIs considered for possible rescue" % len(ppis_counted)
    ppis_rescue = [p for p in ppis_counted 
            if p[-1] > cutoff_fracs or p[2] > cutoff_score]
    print "%s ppis exceeding score cutoff, %s exceeding fracs cutoff" % (len([p
        for p in ppis_counted if p[-1] > cutoff_fracs]), len([p for p in
            ppis_counted if p[2] > cutoff_score]))
    print "%s PPIs to be rescued" % len(ppis_rescue)
    ppis_rescue_as_cxs = [set((p[0],p[1])) for p in ppis_rescue]
    additional_cxs = pres.cxs + ppis_rescue_as_cxs
    cxstruct_rescue = cl.filter_clust(ppis_rescue, ut.list_frac(pres.ppis,
        frac_retain), cltype='mcl', merge_cutoff=merge_cutoff, I=I,
        negmult=negmult, add_to_cxs=additional_cxs)
    print "Total complexes >= size 3: was %s, now %s" % (len([c for c in
        pres.cxs if len(c)>=3]), len([c for c in cxstruct_rescue.cxs if len(c)>=3]))
    return cxstruct_rescue.cxs, cxstruct_rescue.cxppis, ppis_rescue