示例#1
0
def map_sequence(stmts_in, **kwargs):
    """Map sequences using the SiteMapper.

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to map.
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of mapped statements.
    """
    logger.info('Mapping sites on %d statements...' % len(stmts_in))
    sm = SiteMapper(default_site_map)
    valid, mapped = sm.map_sites(stmts_in)
    correctly_mapped_stmts = []
    for ms in mapped:
        if all([True if mm[1] is not None else False
                for mm in ms.mapped_mods]):
            correctly_mapped_stmts.append(ms.mapped_stmt)
    stmts_out = valid + correctly_mapped_stmts
    logger.info('%d statements with valid sites' % len(stmts_out))
    dump_pkl = kwargs.get('save')
    if dump_pkl:
        dump_statements(stmts_out, dump_pkl)
    return stmts_out
示例#2
0
def extract_phos():
    with open(stmts_fname, 'rb') as fh:
        model = pickle.load(fh)

    stmts = []
    for pmid, pmid_stmts in model.items():
        for stmt in pmid_stmts:
            if isinstance(stmt, Phosphorylation):
                stmts.append(stmt)
    logger.info('%d phosphorylations in RAS Machine' % len(stmts))

    stmts = [s for s in stmts if s.enz is not None]
    logger.info('%d phosphorylations with enzyme in RAS Machine' % len(stmts))

    stmts_grounded = filter_grounded(stmts)
    logger.info('%d grounded phosphorylations in RAS Machine' %
                len(stmts_grounded))

    stmts_enzkinase = filter_enzkinase(stmts_grounded)
    logger.info('%d phosphorylations with kinase enzyme in RAS Machine' %
                len(stmts_enzkinase))

    sm = SiteMapper(default_site_map)
    stmts_valid, _ = sm.map_sites(stmts_enzkinase)
    logger.info('%d valid-sequence phosphorylations in RAS Machine' %
                len(stmts_valid))

    pa = Preassembler(hierarchies, stmts_valid)
    stmts_unique = pa.combine_duplicates()
    logger.info('%d unique phosphorylations in RAS Machine' %
                len(stmts_unique))

    stmts_unique = pa.combine_related()
    logger.info('%d top-level phosphorylations in RAS Machine' %
                len(stmts_unique))

    with open('mapped_unique_phos.pkl', 'wb') as fh:
        pickle.dump(stmts_unique, fh)

    # Filter RAS Machine statements for direct and not hypothesis
    stmts = filter_direct(stmts_unique)
    logger.info('%d direct phosphorylations in RAS Machine' % len(stmts))
    stmts = filter_non_hypothesis(stmts)
    logger.info('%d non-hypothesis phosphorylations in RAS Machine' %
                len(stmts))

    with open('filtered_phos.pkl', 'wb') as fh:
        pickle.dump(stmts, fh)

    return stmts
示例#3
0
def map_sequence(stmts_in, **kwargs):
    """Map sequences using the SiteMapper.

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to map.
    do_methionine_offset : boolean
        Whether to check for off-by-one errors in site position (possibly)
        attributable to site numbering from mature proteins after
        cleavage of the initial methionine. If True, checks the reference
        sequence for a known modification at 1 site position greater
        than the given one; if there exists such a site, creates the
        mapping. Default is True.
    do_orthology_mapping : boolean
        Whether to check sequence positions for known modification sites
        in mouse or rat sequences (based on PhosphoSitePlus data). If a
        mouse/rat site is found that is linked to a site in the human
        reference sequence, a mapping is created. Default is True.
    do_isoform_mapping : boolean
        Whether to check sequence positions for known modifications
        in other human isoforms of the protein (based on PhosphoSitePlus
        data). If a site is found that is linked to a site in the human
        reference sequence, a mapping is created. Default is True.
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of mapped statements.
    """
    logger.info('Mapping sites on %d statements...' % len(stmts_in))
    kwarg_list = ['do_methionine_offset', 'do_orthology_mapping',
                  'do_isoform_mapping']
    sm = SiteMapper(default_site_map)
    valid, mapped = sm.map_sites(stmts_in, **_filter(kwargs, kwarg_list))
    correctly_mapped_stmts = []
    for ms in mapped:
        if all([True if mm[1] is not None else False
                for mm in ms.mapped_mods]):
            correctly_mapped_stmts.append(ms.mapped_stmt)
    stmts_out = valid + correctly_mapped_stmts
    logger.info('%d statements with valid sites' % len(stmts_out))
    dump_pkl = kwargs.get('save')
    if dump_pkl:
        dump_statements(stmts_out, dump_pkl)
    return stmts_out
示例#4
0
def extract_phos():
    with open(stmts_fname, 'rb') as fh:
        model = pickle.load(fh)

    stmts = []
    for pmid, pmid_stmts in model.items():
        for stmt in pmid_stmts:
            if isinstance(stmt, Phosphorylation):
                stmts.append(stmt)
    logger.info('%d phosphorylations in RAS Machine' % len(stmts))

    stmts = [s for s in stmts if s.enz is not None]
    logger.info('%d phosphorylations with enzyme in RAS Machine' % len(stmts))

    stmts_grounded = filter_grounded(stmts)
    logger.info('%d grounded phosphorylations in RAS Machine' % len(stmts_grounded))

    stmts_enzkinase = filter_enzkinase(stmts_grounded)
    logger.info('%d phosphorylations with kinase enzyme in RAS Machine' % len(stmts_enzkinase))

    sm = SiteMapper(default_site_map)
    stmts_valid, _ = sm.map_sites(stmts_enzkinase)
    logger.info('%d valid-sequence phosphorylations in RAS Machine' % len(stmts_valid))

    pa = Preassembler(hierarchies, stmts_valid)
    stmts_unique = pa.combine_duplicates()
    logger.info('%d unique phosphorylations in RAS Machine' % len(stmts_unique))

    stmts_unique = pa.combine_related()
    logger.info('%d top-level phosphorylations in RAS Machine' % len(stmts_unique))

    with open('mapped_unique_phos.pkl', 'wb') as fh:
        pickle.dump(stmts_unique, fh, protocol=2)

    # Filter RAS Machine statements for direct and not hypothesis
    stmts = filter_direct(stmts_unique)
    logger.info('%d direct phosphorylations in RAS Machine' % len(stmts))
    stmts = filter_non_hypothesis(stmts)
    logger.info('%d non-hypothesis phosphorylations in RAS Machine' % len(stmts))

    with open('filtered_phos.pkl', 'wb') as fh:
        pickle.dump(stmts, fh, protocol=2)

    return stmts
示例#5
0
    def preassemble(self, filters=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - model_one: require that at least one Agent is in the incremental model
        - model_all: require that all Agents are in the incremental model
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model
        Note that model_one -> prior_all are increasingly more restrictive
        options.

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        """
        stmts = self.get_statements()
        logger.info("%d raw Statements in total" % len(stmts))

        # Fix grounding
        logger.info("Running grounding map")
        twg = gm.agent_texts_with_grounding(stmts)
        prot_map = gm.protein_map_from_twg(twg)
        gm.default_grounding_map.update(prot_map)
        gmap = gm.GroundingMapper(gm.default_grounding_map)
        stmts = gmap.map_agents(stmts, do_rename=True)

        logger.info("%d Statements after grounding map" % len(stmts))

        # Fix sites
        sm = SiteMapper(default_site_map)
        stmts, _ = sm.map_sites(stmts)

        logger.info("%d Statements with valid sequence" % len(stmts))

        if filters:
            if "grounding" in filters:
                # Filter out ungrounded statements
                logger.info("Running grounding filter")
                stmts = self._relevance_filter(stmts, ["grounding"])
                logger.info("%s Statements after filter" % len(stmts))
            if "human_only" in filters:
                # Filter out non-human proteins
                logger.info("Running non-human protein filter")
                stmts = self._relevance_filter(stmts, ["human_only"])
                logger.info("%s Statements after filter" % len(stmts))
            for rel_key in ("prior_one", "model_one", "prior_all", "model_all"):
                if rel_key in filters:
                    logger.info("Running %s relevance filter" % rel_key)
                    stmts = self._relevance_filter(stmts, [rel_key])
                    logger.info("%s Statements after filter" % len(stmts))

        # Combine duplicates
        logger.info("Preassembling %d Statements" % len(stmts))
        pa = Preassembler(hierarchies, stmts)
        self.unique_stmts = pa.combine_duplicates()
        logger.info("%d unique Statements" % len(self.unique_stmts))

        # Run BeliefEngine on unique statements
        be = BeliefEngine()
        be.set_prior_probs(self.unique_stmts)

        # Build statement hierarchy
        self.unique_stmts = pa.combine_related(return_toplevel=False)
        self.toplevel_stmts = [st for st in self.unique_stmts if not st.supports]
        logger.info("%d top-level Statements" % len(self.toplevel_stmts))
        # Run BeliefEngine on hierarchy
        be.set_hierarchy_probs(self.unique_stmts)