示例#1
0
def import_ints(cyto_ppis_fname):
    cyto_ppis = [p[:4] for p in ut.load_lol(cyto_ppis_fname)[1:]]
    ppis_dupes = [(id1.split('_')[1],id2.split('_')[1],prob,
        1 if corum in set(['gold','train']) else 0) 
        for id1,id2,prob,corum in cyto_ppis]
    cxppis = pd.pd_lol(pd.PairDict(ppis_dupes))
    return cxppis
示例#2
0
def load_reactome_pairs(fname='reactome/homo_sapiens.interactions.txt'):
    rmlol = ut.load_lol(fname)
    rmall = pd.PairDict([])
    pd.pd_set_loi_sets(rmall, [[x[1].split(':')[1], x[4].split(':')[1], x[6]] for x in rmlol[1:] if x[1]<>'' and x[4]<>''])
    rmfilt = [(k,v) for k,v in rmall.d.items() if ("reaction" in v) and not ("indirect_complex" in v) and not ("direct_complex" in v)]
    rmfiltpairs = [k for k,v in rmfilt]    
    return rmfiltpairs
def multi_identities(input_fname, out_dir):
    input_list = ut.load_lol(input_fname)
    for desc, prots_fname, source_fasta, odict, target in input_list:
        print "%s, proteins: %s\n source: %s\n odict: %s\ntarget: %s" % (desc,
                prots_fname, source_fasta, odict, target)
        prots = ut.load_list(prots_fname)
        sims = all_identities(prots, odict, source_fasta, target)
        out_fname = os.path.join(out_dir,
                ut.shortname(target).split('.')[0] + "_" + desc + ".txt")
        ut.write_tab_file(sims, out_fname, islist=True)
示例#4
0
def load_pepcount(f):
    lol = ut.load_lol(f)
    print "Omitting header:", lol[0]
    lol = lol[1:]
    peps = ut.i0(lol[1:])
    samples = lol[0][2:]
    arr = np.zeros((len(peps), len(samples)))
    for i,row in enumerate(lol[1:]):
        arr[i,:] = row[2:]
    return peps, samples, arr
示例#5
0
def exported_diff(cy_basefile, cy_difffile, col_header, diff_ppis=None,
        justids=False):
    """
    Makes a new cy_ file labeling whether that interaction is also found in the
    cy_difffile (or the diff_ppis--pass None for cy_difffile in that case).
    """
    def cy_ppi_to_pair(p):
        return (p[0].split('_')[1], p[1].split('_')[1])
    if cy_difffile is not None:
        pd_diff = pd.PairDict([cy_ppi_to_pair(p) 
            for p in ut.load_lot(cy_difffile)[1:]])
    else:
        pd_diff = pd.PairDict(diff_ppis)
    header = ut.load_lol(cy_basefile)[0]
    lines = ut.load_lol(cy_basefile)[1:]
    if justids:
        lines = [l[:2] for l in lines]
        header = header[:2]
    header += [col_header]
    ut.write_tab_file([r + [pd_diff.contains(cy_ppi_to_pair(r))] for r in
        lines], ut.pre_ext(cy_basefile, col_header), header=header)
def ensg_to_ensp_and_park(ppips):
    dhpg = seqs.prots2genes('/Users/blakeweb/Dropbox/complex/data/sequences/canon/Hs.fasta')
    dhgp = ut.dict_inverse(dhpg)
    parkids = ut.load_lol('./orth_similarities/table.Hsapiens/Hsapiens_id.txt')
    ppips_ensp = [dhgp[g] for g in ppips]
    dg2park = dict([(x[2],x[0]) for x in parkids])
    dp2park = dict([(x[1],x[0]) for x in parkids])
    park_ppips_most = [dp2park[p] for p in ppips_ensp if p in dp2park]
    ppips_ensp_rest = [p for p in ppips_ensp if p not in dp2park]
    ppips_ensg_rest = [dhpg[p] for p in ppips_ensp_rest]
    park_ppips_rest = [dg2park[p] for p in ppips_ensg_rest if p in dg2park]
    park_ppips = park_ppips_most + park_ppips_rest
    return park_ppips
示例#7
0
def load_reactome_pairs_reactions(fname='reactome/homo_sapiens.interactions.txt'):
    rmlol = ut.load_lol(fname)
    rmpd = pd.PairDict([])
    pd.pd_set_loi_sets(rmpd, [[x[1].split(':')[1], x[4].split(':')[1], x[7]]
        for x in rmlol[1:] if x[1]<>'' and x[4]<>'' and x[6]=='reaction'])
    return rmpd
示例#8
0
def entrez_desc():
    return dict([(l[4][2:],l[2].split('[')[0]) for l in
        ut.load_lol(ut.config('gene_desc_Hs'))])
示例#9
0
def load_ppis(fname):
    return ut.load_lol(fname, dtypes=(str,str,float,int))