示例#1
0
def abundance_dict(species):
    """Returns dict - uniprot => (decay, abundance)"""
    header, data = utils.load_ned_data(species)
    pi = header.index('proID')
    ai = header.index('rel.abun')
    di = header.index('def')
    return {line[pi]: (float(line[ai]), line[di]) for line in data}
def NED_core_interactor_test(protein_subset=None):
    header, data = utils.load_ned_data('human')
    # data = [line for line in data if line[0] in protein_subset]
    neds = set(line[-2] for line in data if line[-3] == 'NED')
    eds = set(line[-2] for line in data if line[-3] == 'ED')
    header, data = load_hein()
    baits = set(protein for line in data for protein in line[2])
    # Select set of proteins common to both NED data and stoich data.
    # baits = baits.intersection(neds.union(eds))
    # neds = neds.intersection(baits)
    # eds = eds.intersection(baits)
    # Test likelihood of finding NED proteins in Core
    print(len(neds), len(eds))
    ned_core, ed_core, ned_noncore, ed_noncore = 0, 0, 0, 0
    for line in data:
        # Ignore self-interactors
        if line[14] == '+' or line[15] == '+':
            continue
        if line[2].intersection(neds) != set():
            if line[13] == '+':
                ned_core += 1
            else:
                ned_noncore += 1
        elif line[2].intersection(eds) != set():
            if line[13] == '+':
                ed_core += 1
            else:
                ed_noncore += 1
    print(ned_core, ed_core, ned_noncore, ed_noncore)
示例#3
0
def load_paxdb_data(species):
    if species == 'mouse':
        meta = paxdb.get_metadata('10090')
    elif species == 'human':
        meta = paxdb.get_metadata('9606')
    data = utils.load_ned_data(species)[1]
    pmap = protein_map(species)
    skip = ['CELL_LINE']
    for i in skip:
        meta.pop(i)
    abunds = [paxdb.Abundances(meta[t]['filename'], 0) for t in sorted(meta)]
    header = '\t'.join(['prot', '\t'.join(sorted(meta)), 'tcount', 'def'])
    return header, data, abunds, pmap
示例#4
0
 def __init__(self, species, homologs=False):
     """Mouse specific if using homologs, else human or mouse."""
     neds = utils.load_ned_data(species)[1]
     if homologs == True:
         if species != 'mouse':
             raise ValueError('homologs only implemented for mouse')
         self._homologs = utils.get_homologs()
         self._corum = dbloader.LoadCorum(version='core')
     else:
         self._homologs = False
         self._corum = dbloader.LoadCorum(species, 'core')
     self._coex = coexpressdb.Coexpression(species)
     # Check these indices if using new datasets
     self._decay = {line[0]: line[-2] for line in neds}
     self._species = species
     self._outdata = []
示例#5
0
def load_proteomicsdb_data():
    data = utils.load_ned_data('human')[1]
    isoab = proteomicsdb.Abundances('human_protdb_isoforms_expression.txt')
    abunds = proteomicsdb.Abundances('trembl_tissues.txt')
    tissues = abunds.tissues
    header = '{0}\t{1}\t{2}\t{3}\n'.format('prot', '\t'.join(tissues),
                                           'tcount', 'def')
    with open('data/human_proteomicsdb_tissue_expression.txt', 'w') as outfile:
        outfile.write(header)
        for line in data:
            prot = line[0]
            if prot in abunds.proteins:
                expression = [abunds.expression(prot, tis) for tis in ttargets]
            elif prot in isoab.proteins:
                expression = [isoab.expression(prot, tis) for tis in ttargets]
            else:
                continue
            tcount = str(len(expression) - expression.count('NA'))
            newline = '\t'.join([prot] + expression + [tcount, line[-1]])
            outfile.write(newline + '\n')
示例#6
0
def test_abundance_dict(species):
    if species == 'mouse_homologs':
        return
    _, data = utils.load_ned_data(species)
    abdict = ab.abundance_dict(species)
    assert len(data) == len(abdict)
def test_load_ned_data(species):
    header, data = utils.load_ned_data(species)
    for colname in ['proID', 'Rel. abund', 'def']:
        assert colname in header
    assert len(data[0]) == len(header)