示例#1
0
文件: intera.py 项目: rfour92/pypath
    def __init__(
        self,
        protein,
        id_type='uniprot',
        ncbi_tax_id=9606,
        typ='unknown',
        motif=None,
        residue=None,
        isoform=1,
        evidences=None,
        seq=None,
    ):

        self.non_digit = re.compile(r'[^\d.-]+')
        self.protein = (protein
                        if hasattr(protein, 'identifier') else entity.Entity(
                            identifier=protein,
                            id_type=id_type,
                            taxon=ncbi_tax_id,
                        ))
        self.id_type = id_type
        self.typ = typ.lower()
        self.seq = seq
        self.motif = motif
        self.residue = residue
        self.isoform = (isoform if type(isoform) is int else int(
            self.non_digit.sub('', isoform)))
        self.isoforms = set()
        self.add_isoform(isoform)
        self.evidences = evidence.Evidences()
        self.add_evidences(evidences)
示例#2
0
    def __init__(self, domain, ptm, evidences = None, pdbs = None):

        self.ptm = ptm
        self.domain = domain
        self.pdbs = set()
        self.pnetw_score = None

        self.add_pdbs(pdbs)

        self.evidences = evidences or evidence.Evidences()
示例#3
0
    def __init__(
            self,
            protein,
            start,
            end,
            id_type = 'uniprot',
            ncbi_tax_id = 9606,
            regex = None,
            instance = None,
            isoform = 1,
            motif_name = None,
            prob = None,
            elm = None,
            description = None,
            seq = None,
            evidences = None,
        ):

        non_digit = re.compile(r'[^\d.-]+')
        self.protein = (
            protein
                if hasattr(protein, 'identifier') else
            entity.Entity(
                protein,
                id_type = id_type,
                taxon = ncbi_tax_id,
            )
        )
        self.id_type = id_type
        self.seq = seq
        self.isoform = (
            isoform
                if isinstance(isoform, int) else
            int(non_digit.sub('', isoform))
        )
        self.start = (
            start
                if not isinstance(start, common.basestring) else
            int(non_digit.sub('', start))
        )
        self.end = (
            end
                if not isinstance(end, common.basestring) else
            int(non_digit.sub('', end))
        )
        self.regex = None if regex is None else re.compile(regex)
        self.instance = instance
        self.motif_name = motif_name
        self.prob = prob
        self.elm = elm
        self.description = description
        self.evidences = evidence.Evidences()

        self.add_evidences(evidences)
示例#4
0
    def _process(self, p):

        # human leukocyte antigenes result a result an
        # extremely high number of combinations
        if (not p['kinase'] or (isinstance(p['substrate'], common.basestring)
                                and p['substrate'].startswith('HLA'))):

            return

        if not isinstance(p['kinase'], list):
            p['kinase'] = [p['kinase']]

        kinase_ups = mapping.map_names(
            p['kinase'],
            self.id_type_enzyme,
            'uniprot',
            ncbi_tax_id=self.ncbi_tax_id,
        )

        substrate_ups_all = set()

        for sub_id_type in self.id_type_substrate:

            if isinstance(sub_id_type, (list, tuple)):
                sub_id_type, sub_id_attr = sub_id_type
            else:
                sub_id_attr = 'substrate'

            substrate_ups_all.update(
                set(
                    mapping.map_name(
                        p[sub_id_attr],
                        sub_id_type,
                        'uniprot',
                        self.ncbi_tax_id,
                    )))

        # looking up sequences in all isoforms:
        substrate_ups = []

        for s in substrate_ups_all:

            if 'substrate_isoform' in p and p['substrate_isoform']:

                substrate_ups.append((s, p['substrate_isoform']))

            else:

                se = self.get_seq(s)

                if se is None:
                    continue

                for isof in se.isoforms():

                    if 'instance' in p and p['instance'] is not None:

                        if se.match(
                                p['instance'],
                                p['start'],
                                p['end'],
                                isoform=isof,
                        ):

                            substrate_ups.append((s, isof))

                    else:

                        if se.match(
                                p['resaa'],
                                p['resnum'],
                                isoform=isof,
                        ):

                            substrate_ups.append((s, isof))

        if self.trace:

            if p['substrate'] not in self.sub_ambig:

                self.sub_ambig[p['substrate']] = substrate_ups

            for k in p['kinase']:

                if k not in self.kin_ambig:

                    self.kin_ambig[k] = kinase_ups
            # generating report on non matching substrates
            if len(substrate_ups) == 0:

                for s in substrate_ups_all:

                    se = self.get_seq(s[0])

                    if se is None:
                        continue

                    self.nomatch.append((
                        s[0],
                        s[1],
                        (
                            p['substrate_refseq']
                            if 'substrate_refseq' in p else '',
                            s,
                            p['instance'],
                            se.get(p['start'], p['end']),
                        ),
                    ))

        # building objects representing the enzyme-substrate interaction(s)

        if 'typ' not in p:
            p['typ'] = 'phosphorylation'

        _resources = tuple(
            (self.input_param.
             get_via(name) if hasattr(self.input_param, 'get_via') else name)
            for name in (p['databases'] if 'databases' in p else ()))
        _resources += ((self.name, ) if isinstance(
            self.input_param, common.basestring) else (self.input_param, ))

        # collecting the evidences
        evidences = evidence.Evidences(
            evidence.Evidence(resource=_res,
                              references=p['references'] if 'references' in
                              p else None) for _res in _resources)

        for s in substrate_ups:

            # building the objects representing the substrate
            se = self.get_seq(s[0])

            if se is None:
                continue

            res = intera.Residue(
                p['resnum'],
                p['resaa'],
                s[0],
                isoform=s[1],
                ncbi_tax_id=self.ncbi_tax_id,
            )

            if 'instance' not in p or p['instance'] is None:

                reg = se.get_region(
                    p['resnum'],
                    p['start'] if 'start' in p else None,
                    p['end'] if 'end' in p else None,
                    isoform=s[1],
                )

                if reg is not None:

                    p['start'], p['end'], p['instance'] = reg

            mot = intera.Motif(
                s[0],
                p['start'],
                p['end'],
                instance=p['instance'],
                isoform=s[1],
                ncbi_tax_id=self.ncbi_tax_id,
            )

            ptm = intera.Ptm(
                s[0],
                motif=mot,
                residue=res,
                typ=p['typ'],
                evidences=evidences,
                isoform=s[1],
                ncbi_tax_id=self.ncbi_tax_id,
            )

            for k in kinase_ups:

                if (not self.allow_mixed_organisms
                        and (self.get_taxon(k) != self.ncbi_tax_id
                             or self.get_taxon(s[0]) != self.ncbi_tax_id)):
                    continue

                # the enzyme (kinase)
                dom = intera.Domain(
                    protein=k,
                    ncbi_tax_id=self.ncbi_tax_id,
                )

                dommot = intera.DomainMotif(
                    domain=dom,
                    ptm=ptm,
                    evidences=evidences,
                )

                if hasattr(self.input_param, 'extra_attrs'):

                    for attr, key in iteritems(self.input_param.extra_attrs):

                        if key in p:

                            setattr(dommot, attr, p[key])

                yield dommot