示例#1
0
def pairs_notfound_sps(df, fs, sps="Hs Mm Sp Dm Ce".split()):
    """
    df: dataframe with id1, id2, and the sp_evidence columns.
    fs: all the elution filenames
    """
    results = []
    for sp in sps:
        pairs = [(r["id1"], r["id2"]) for i, r in df[df[sp + "_evidence"] != "frac"].iterrows()]
        print "%s pairs for %s" % (len(pairs), sp)
        odict = orth.odict("Hs", sp)
        orths = pairs_found(pairs, odict) if odict else len(pairs)  # same sp
        fs_sp = [f for f in fs if f.find(sp + "_") > -1]
        print "%s fractionations for %s" % (len(fs_sp), sp)
        allps = el.all_prots(fs_sp)
        counts = pairs_orth_found(pairs, odict, allps)
        results.append((len(pairs), orths, counts))
    return sps, results
def prot_counts(fs, min_count=2):
    """
    Sum up all the spectral counts for all the proteins in a set of
    fractionations.  
    Filtered s.t. any returned protein will have at least min_count counts in
    one fraction of one of the fractionations.
    Return a dict: {prot1:count1, prot2:count2, ...}
    """
    allprots = el.all_prots(fs, min_count=min_count)
    pcounts = collections.defaultdict(float)
    for f in fs:
        e = el.load_elution(f)
        psums = np.sum(np.array(e.mat),axis=1)
        frac_sum = sum(psums)
        norm_term = 1 / (frac_sum * len(fs))
        for p,psum in zip(e.prots,psums):
            if p in allprots:
                pcounts[p] += (psum * norm_term)
    return pcounts
def manysp_all_prots(sps, elutfs, **kwargs):
    d_allprots = dict([(s,el.all_prots(elutfs, sp_base=s, **kwargs)) 
        for s in sps])
    return d_allprots