def loader(ncbi_tax_id=9606): all_up = uniprot_input.all_uniprots(organism=ncbi_tax_id) return (dataio.get_pfam_regions(uniprots=all_up, dicts='uniprot', keepfile=True))
def get_uniprot_sec(organism=9606): """ Downloads and processes the mapping between secondary and primary UniProt IDs. Yields pairs of secondary and primary UniProt IDs. :param int organism: NCBI Taxonomy ID of the organism. """ if organism is not None: proteome = uniprot_input.all_uniprots(organism=organism) proteome = set(proteome) sec_pri = [] url = urls.urls['uniprot_sec']['url'] c = curl.Curl(url, silent=False, large=True) for line in filter( lambda line: len(line) == 2 and (organism is None or line[1] in proteome), map(lambda i: i[1].decode('utf-8').split(), filter(lambda i: i[0] >= 30, enumerate(c.result)))): yield line
def read_mapping_uniprot_list(self, param, uniprots=None, ncbi_tax_id=None): mapping_o = {} mapping_i = {} ncbi_tax_id = param.ncbi_tax_id \ if ncbi_tax_id is None else ncbi_tax_id if uniprots is None: uniprots = uniprot_input.all_uniprots(ncbi_tax_id, swissprot=param.swissprot) if param.targetNameType != 'uniprot': utarget = self._read_mapping_uniprot_list('ACC', param.target_ac_name, uniprots) _ = utarget.readline() ac_list = list( map(lambda l: l.decode('ascii').split('\t')[1].strip(), utarget)) else: ac_list = uniprots udata = self._read_mapping_uniprot_list(param.target_ac_name, param.ac_name, ac_list) _ = udata.readline() for l in udata: l = l.decode('ascii').strip().split('\t') if l[1] not in mapping_o: mapping_o[l[1]] = [] mapping_o[l[1]].append(l[0]) if param.bi: if l[0] not in mapping_i: mapping_i[l[0]] = [] mapping_i[l[0]].append(l[1]) self.mapping["to"] = mapping_o self.cleanDict(self.mapping["to"]) if param.bi: self.mapping["from"] = mapping_i self.cleanDict(self.mapping["from"])
def get_uniprot_sec(organism=9606): if organism is not None: proteome = uniprot_input.all_uniprots(organism=organism) proteome = set(proteome) sec_pri = [] url = urls.urls['uniprot_sec']['url'] c = curl.Curl(url, silent=False, large=True) data = c.result return filter( lambda line: len(line) == 2 and (organism is None or line[1] in proteome), map(lambda i: i[1].decode('utf-8').split(), filter(lambda i: i[0] >= 30, enumerate(data))))
def load_proteome(self, taxon, swissprot_only=True): key = (taxon, swissprot_only) if key not in self._proteomes: self._proteomes[key] = (set(uniprot_input.all_uniprots(*key))) for protein in self._proteomes[key]: self._taxonomy[protein] = key if not swissprot_only: self.load_proteome(taxon, True)