def all_identities(source_ps, odict_fname, source_fasta, target_fasta,
        target_id_dict_fname=None):
    odict = load_odict(odict_fname)
    if target_id_dict_fname is not None:
        tid_dict = dict([(x[0],x[2]) for x in
            ut.load_lot(target_id_dict_fname)])
        odict = dict([(k,[tid_dict[v] for v in vs]) for k,vs in odict.items()])
    dsource = load_seq_dict(source_fasta)
    dtarget = load_seq_dict(target_fasta)
    pairs = [(s, odict[s][0]) for s in source_ps if s in odict]
    print "%s of %s with orthologs--getting identities" % (len(pairs),
            len(source_ps))
    idents = []
    for s,t in pairs:
        try:
            ident = seqs.percent_identity(dsource[s], dtarget[t])
            idents.append(ident)
        except decorators.TimeoutError, ex:
            print "timeout for %s %s" %(s,t), ex.args
        except Exception, ex:
            print "unknown error for %s %s" % (s,t), ex.args
示例#2
0
def exported_diff(cy_basefile, cy_difffile, col_header, diff_ppis=None,
        justids=False):
    """
    Makes a new cy_ file labeling whether that interaction is also found in the
    cy_difffile (or the diff_ppis--pass None for cy_difffile in that case).
    """
    def cy_ppi_to_pair(p):
        return (p[0].split('_')[1], p[1].split('_')[1])
    if cy_difffile is not None:
        pd_diff = pd.PairDict([cy_ppi_to_pair(p) 
            for p in ut.load_lot(cy_difffile)[1:]])
    else:
        pd_diff = pd.PairDict(diff_ppis)
    header = ut.load_lol(cy_basefile)[0]
    lines = ut.load_lol(cy_basefile)[1:]
    if justids:
        lines = [l[:2] for l in lines]
        header = header[:2]
    header += [col_header]
    ut.write_tab_file([r + [pd_diff.contains(cy_ppi_to_pair(r))] for r in
        lines], ut.pre_ext(cy_basefile, col_header), header=header)
示例#3
0
def load_hpa_localization(fname='./enrichment_datasets/subcellular_location.csv'):
    locs = ut.load_lot(fname, sep=",")[1:]
    locs_clean = [[x.strip("\"") for x in l] for l in locs]
    locs_filt = [l for l in locs_clean if l[4]=="Supportive"]
    locs = [(l[0],l[1]) for l in locs_filt]
    return locs