def abundance_dict(species): """Returns dict - uniprot => (decay, abundance)""" header, data = utils.load_ned_data(species) pi = header.index('proID') ai = header.index('rel.abun') di = header.index('def') return {line[pi]: (float(line[ai]), line[di]) for line in data}
def NED_core_interactor_test(protein_subset=None): header, data = utils.load_ned_data('human') # data = [line for line in data if line[0] in protein_subset] neds = set(line[-2] for line in data if line[-3] == 'NED') eds = set(line[-2] for line in data if line[-3] == 'ED') header, data = load_hein() baits = set(protein for line in data for protein in line[2]) # Select set of proteins common to both NED data and stoich data. # baits = baits.intersection(neds.union(eds)) # neds = neds.intersection(baits) # eds = eds.intersection(baits) # Test likelihood of finding NED proteins in Core print(len(neds), len(eds)) ned_core, ed_core, ned_noncore, ed_noncore = 0, 0, 0, 0 for line in data: # Ignore self-interactors if line[14] == '+' or line[15] == '+': continue if line[2].intersection(neds) != set(): if line[13] == '+': ned_core += 1 else: ned_noncore += 1 elif line[2].intersection(eds) != set(): if line[13] == '+': ed_core += 1 else: ed_noncore += 1 print(ned_core, ed_core, ned_noncore, ed_noncore)
def load_paxdb_data(species): if species == 'mouse': meta = paxdb.get_metadata('10090') elif species == 'human': meta = paxdb.get_metadata('9606') data = utils.load_ned_data(species)[1] pmap = protein_map(species) skip = ['CELL_LINE'] for i in skip: meta.pop(i) abunds = [paxdb.Abundances(meta[t]['filename'], 0) for t in sorted(meta)] header = '\t'.join(['prot', '\t'.join(sorted(meta)), 'tcount', 'def']) return header, data, abunds, pmap
def __init__(self, species, homologs=False): """Mouse specific if using homologs, else human or mouse.""" neds = utils.load_ned_data(species)[1] if homologs == True: if species != 'mouse': raise ValueError('homologs only implemented for mouse') self._homologs = utils.get_homologs() self._corum = dbloader.LoadCorum(version='core') else: self._homologs = False self._corum = dbloader.LoadCorum(species, 'core') self._coex = coexpressdb.Coexpression(species) # Check these indices if using new datasets self._decay = {line[0]: line[-2] for line in neds} self._species = species self._outdata = []
def load_proteomicsdb_data(): data = utils.load_ned_data('human')[1] isoab = proteomicsdb.Abundances('human_protdb_isoforms_expression.txt') abunds = proteomicsdb.Abundances('trembl_tissues.txt') tissues = abunds.tissues header = '{0}\t{1}\t{2}\t{3}\n'.format('prot', '\t'.join(tissues), 'tcount', 'def') with open('data/human_proteomicsdb_tissue_expression.txt', 'w') as outfile: outfile.write(header) for line in data: prot = line[0] if prot in abunds.proteins: expression = [abunds.expression(prot, tis) for tis in ttargets] elif prot in isoab.proteins: expression = [isoab.expression(prot, tis) for tis in ttargets] else: continue tcount = str(len(expression) - expression.count('NA')) newline = '\t'.join([prot] + expression + [tcount, line[-1]]) outfile.write(newline + '\n')
def test_abundance_dict(species): if species == 'mouse_homologs': return _, data = utils.load_ned_data(species) abdict = ab.abundance_dict(species) assert len(data) == len(abdict)
def test_load_ned_data(species): header, data = utils.load_ned_data(species) for colname in ['proID', 'Rel. abund', 'def']: assert colname in header assert len(data[0]) == len(header)