def __init__( self, protein, id_type='uniprot', ncbi_tax_id=9606, typ='unknown', motif=None, residue=None, isoform=1, evidences=None, seq=None, ): self.non_digit = re.compile(r'[^\d.-]+') self.protein = (protein if hasattr(protein, 'identifier') else entity.Entity( identifier=protein, id_type=id_type, taxon=ncbi_tax_id, )) self.id_type = id_type self.typ = typ.lower() self.seq = seq self.motif = motif self.residue = residue self.isoform = (isoform if type(isoform) is int else int( self.non_digit.sub('', isoform))) self.isoforms = set() self.add_isoform(isoform) self.evidences = evidence.Evidences() self.add_evidences(evidences)
def __init__(self, domain, ptm, evidences = None, pdbs = None): self.ptm = ptm self.domain = domain self.pdbs = set() self.pnetw_score = None self.add_pdbs(pdbs) self.evidences = evidences or evidence.Evidences()
def __init__( self, protein, start, end, id_type = 'uniprot', ncbi_tax_id = 9606, regex = None, instance = None, isoform = 1, motif_name = None, prob = None, elm = None, description = None, seq = None, evidences = None, ): non_digit = re.compile(r'[^\d.-]+') self.protein = ( protein if hasattr(protein, 'identifier') else entity.Entity( protein, id_type = id_type, taxon = ncbi_tax_id, ) ) self.id_type = id_type self.seq = seq self.isoform = ( isoform if isinstance(isoform, int) else int(non_digit.sub('', isoform)) ) self.start = ( start if not isinstance(start, common.basestring) else int(non_digit.sub('', start)) ) self.end = ( end if not isinstance(end, common.basestring) else int(non_digit.sub('', end)) ) self.regex = None if regex is None else re.compile(regex) self.instance = instance self.motif_name = motif_name self.prob = prob self.elm = elm self.description = description self.evidences = evidence.Evidences() self.add_evidences(evidences)
def _process(self, p): # human leukocyte antigenes result a result an # extremely high number of combinations if (not p['kinase'] or (isinstance(p['substrate'], common.basestring) and p['substrate'].startswith('HLA'))): return if not isinstance(p['kinase'], list): p['kinase'] = [p['kinase']] kinase_ups = mapping.map_names( p['kinase'], self.id_type_enzyme, 'uniprot', ncbi_tax_id=self.ncbi_tax_id, ) substrate_ups_all = set() for sub_id_type in self.id_type_substrate: if isinstance(sub_id_type, (list, tuple)): sub_id_type, sub_id_attr = sub_id_type else: sub_id_attr = 'substrate' substrate_ups_all.update( set( mapping.map_name( p[sub_id_attr], sub_id_type, 'uniprot', self.ncbi_tax_id, ))) # looking up sequences in all isoforms: substrate_ups = [] for s in substrate_ups_all: if 'substrate_isoform' in p and p['substrate_isoform']: substrate_ups.append((s, p['substrate_isoform'])) else: se = self.get_seq(s) if se is None: continue for isof in se.isoforms(): if 'instance' in p and p['instance'] is not None: if se.match( p['instance'], p['start'], p['end'], isoform=isof, ): substrate_ups.append((s, isof)) else: if se.match( p['resaa'], p['resnum'], isoform=isof, ): substrate_ups.append((s, isof)) if self.trace: if p['substrate'] not in self.sub_ambig: self.sub_ambig[p['substrate']] = substrate_ups for k in p['kinase']: if k not in self.kin_ambig: self.kin_ambig[k] = kinase_ups # generating report on non matching substrates if len(substrate_ups) == 0: for s in substrate_ups_all: se = self.get_seq(s[0]) if se is None: continue self.nomatch.append(( s[0], s[1], ( p['substrate_refseq'] if 'substrate_refseq' in p else '', s, p['instance'], se.get(p['start'], p['end']), ), )) # building objects representing the enzyme-substrate interaction(s) if 'typ' not in p: p['typ'] = 'phosphorylation' _resources = tuple( (self.input_param. get_via(name) if hasattr(self.input_param, 'get_via') else name) for name in (p['databases'] if 'databases' in p else ())) _resources += ((self.name, ) if isinstance( self.input_param, common.basestring) else (self.input_param, )) # collecting the evidences evidences = evidence.Evidences( evidence.Evidence(resource=_res, references=p['references'] if 'references' in p else None) for _res in _resources) for s in substrate_ups: # building the objects representing the substrate se = self.get_seq(s[0]) if se is None: continue res = intera.Residue( p['resnum'], p['resaa'], s[0], isoform=s[1], ncbi_tax_id=self.ncbi_tax_id, ) if 'instance' not in p or p['instance'] is None: reg = se.get_region( p['resnum'], p['start'] if 'start' in p else None, p['end'] if 'end' in p else None, isoform=s[1], ) if reg is not None: p['start'], p['end'], p['instance'] = reg mot = intera.Motif( s[0], p['start'], p['end'], instance=p['instance'], isoform=s[1], ncbi_tax_id=self.ncbi_tax_id, ) ptm = intera.Ptm( s[0], motif=mot, residue=res, typ=p['typ'], evidences=evidences, isoform=s[1], ncbi_tax_id=self.ncbi_tax_id, ) for k in kinase_ups: if (not self.allow_mixed_organisms and (self.get_taxon(k) != self.ncbi_tax_id or self.get_taxon(s[0]) != self.ncbi_tax_id)): continue # the enzyme (kinase) dom = intera.Domain( protein=k, ncbi_tax_id=self.ncbi_tax_id, ) dommot = intera.DomainMotif( domain=dom, ptm=ptm, evidences=evidences, ) if hasattr(self.input_param, 'extra_attrs'): for attr, key in iteritems(self.input_param.extra_attrs): if key in p: setattr(dommot, attr, p[key]) yield dommot