Пример #1
0
def _get_text_for_grounding(stmt, agent_text):
    """Get text context for Adeft disambiguation

    If the INDRA database is available, attempts to get the fulltext from
    which the statement was extracted. If the fulltext is not available, the
    abstract is returned. If the indra database is not available, uses the
    pubmed client to get the abstract. If no abstract can be found, falls back
    on returning the evidence text for the statement.

    Parameters
    ----------
    stmt : py:class:`indra.statements.Statement`
        Statement with agent we seek to disambiguate.

    agent_text : str
       Agent text that needs to be disambiguated

    Returns
    -------
    text : str
        Text for Adeft disambiguation
    """
    text = None
    # First we will try to get content from the DB
    try:
        from indra_db.util.content_scripts \
            import get_text_content_from_text_refs
        from indra.literature.adeft_tools import universal_extract_text
        refs = stmt.evidence[0].text_refs
        # Prioritize the pmid attribute if given
        if stmt.evidence[0].pmid:
            refs['PMID'] = stmt.evidence[0].pmid
        logger.debug('Obtaining text for disambiguation with refs: %s' %
                     refs)
        content = get_text_content_from_text_refs(refs)
        if not content:
            raise ValueError('Text obtained from DB is empty')
        text = universal_extract_text(content, contains=agent_text)
        if text:
            return text
    except Exception as e:
        logger.info('Could not get text for disambiguation from DB: %s' % e)
    # If that doesn't work, we try PubMed next
    if text is None:
        from indra.literature import pubmed_client
        pmid = stmt.evidence[0].pmid
        if pmid:
            logger.debug('Obtaining abstract for disambiguation for PMID%s' %
                         pmid)
            text = pubmed_client.get_abstract(pmid)
            if text:
                return text
    # Finally, falling back on the evidence sentence
    if text is None:
        logger.info('Falling back on sentence-based disambiguation')
        text = stmt.evidence[0].text
        return text
    return None
Пример #2
0
def _get_text_for_grounding(stmt, agent_text):
    """Get text context for Adeft disambiguation

    If the INDRA database is available, attempts to get the fulltext from
    which the statement was extracted. If the fulltext is not available, the
    abstract is returned. If the indra database is not available, uses the
    pubmed client to get the abstract. If no abstract can be found, falls back
    on returning the evidence text for the statement.

    Parameters
    ----------
    stmt : py:class:`indra.statements.Statement`
        Statement with agent we seek to disambiguate.

    agent_text : str
       Agent text that needs to be disambiguated

    Returns
    -------
    text : str
        Text for Adeft disambiguation
    """
    text = None
    # First we will try to get content from the DB
    try:
        from indra_db.util.content_scripts \
            import get_text_content_from_text_refs
        from indra.literature.adeft_tools import universal_extract_text
        refs = stmt.evidence[0].text_refs
        # Prioritize the pmid attribute if given
        if stmt.evidence[0].pmid:
            refs['PMID'] = stmt.evidence[0].pmid
        logger.info('Obtaining text for disambiguation with refs: %s' %
                    refs)
        content = get_text_content_from_text_refs(refs)
        text = universal_extract_text(content, contains=agent_text)
        if text:
            return text
    except Exception as e:
        logger.info('Could not get text for disambiguation from DB.')
    # If that doesn't work, we try PubMed next
    if text is None:
        from indra.literature import pubmed_client
        pmid = stmt.evidence[0].pmid
        if pmid:
            logger.info('Obtaining abstract for disambiguation for PMID%s' %
                        pmid)
            text = pubmed_client.get_abstract(pmid)
            if text:
                return text
    # Finally, falling back on the evidence sentence
    if text is None:
        logger.info('Falling back on sentence-based disambiguation')
        text = stmt.evidence[0].text
        return text
    return None
Пример #3
0
    def _get_text_for_grounding(self, stmt, agent_text):
        """Get text context for Adeft disambiguation

        If the INDRA database is available, attempts to get the fulltext from
        which the statement was extracted. If the fulltext is not available,
        the abstract is returned. If the indra database is not available, uses
        the pubmed client to get the abstract. If no abstract can be found,
        falls back on returning the evidence text for the statement.

        Parameters
        ----------
        stmt : py:class:`indra.statements.Statement`
            Statement with agent we seek to disambiguate.

        agent_text : str
           Agent text that needs to be disambiguated

        Returns
        -------
        text : str
            Text for Adeft disambiguation
        """
        text = None
        # First we will try to get content from a local text content DB if
        # available since this is the fastest option
        if self.has_local_text_db:
            try:
                from indra_db_lite import get_plaintexts_for_text_ref_ids, \
                    get_text_ref_ids_for_pmids
                refs = stmt.evidence[0].text_refs
                trid = refs.get('TRID')
                pmid = refs.get('PMID')
                if trid:
                    text_content = get_plaintexts_for_text_ref_ids([trid])
                    _, content = next(text_content.trid_content_pairs())
                    if content:
                        return content
                elif pmid:
                    mappings = get_text_ref_ids_for_pmids([int(pmid)])
                    if int(pmid) in mappings:
                        trid = mappings[int(pmid)]
                        text_content = get_plaintexts_for_text_ref_ids([trid])
                        _, content = next(text_content.trid_content_pairs())
                        if content:
                            return content
            except Exception as e:
                logger.info('Could not get text from local DB: %s' % e)
        # If the above is not available or fails, we try the INDRA DB
        # if available.
        if self.__tc is not None:
            try:
                from indra.literature.adeft_tools import universal_extract_text
                refs = stmt.evidence[0].text_refs
                # Prioritize the pmid attribute if given
                if stmt.evidence[0].pmid:
                    refs['PMID'] = stmt.evidence[0].pmid
                logger.debug(
                    'Obtaining text for disambiguation with refs: %s' % refs)
                content = self.__tc.get_text_content_from_text_refs(refs)
                if not content:
                    raise ValueError('Text obtained from DB is empty')
                text = universal_extract_text(content, contains=agent_text)
                if text:
                    return text
            except Exception as e:
                logger.info(
                    'Could not get text for disambiguation from DB: %s' % e)
        # If that doesn't work, we try PubMed next trying to fetch an abstract
        if text is None:
            from indra.literature import pubmed_client
            pmid = stmt.evidence[0].pmid
            if pmid:
                logger.debug(
                    'Obtaining abstract for disambiguation for PMID%s' % pmid)
                text = pubmed_client.get_abstract(pmid)
                if text:
                    return text
        # Finally, falling back on the evidence sentence
        if text is None:
            logger.info('Falling back on sentence-based disambiguation')
            text = stmt.evidence[0].text
            return text
        return None
Пример #4
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Get texts for statements'
                                     ' with agent from a list of shortforms')
    parser.add_argument('vars', nargs='*')
    args = parser.parse_args()
    shortforms = args.vars
    all_stmts = set()
    cased_shortforms = [
        escape_filename(shortform) for shortform in sorted(shortforms)
    ]
    for shortform in shortforms:
        cased_shortform = escape_filename(shortform)
        path = os.path.join(DATA_PATH, 'statements',
                            f'{cased_shortform}_statements.json')
        with open(path, 'r') as f:
            stmts = json.load(f)
        all_stmts.update(stmts)
        ref_dict, text_dict = get_text_content_from_stmt_ids(all_stmts)
    text_dict = {
        text_ref: universal_extract_text(article, contains=shortforms)
        for text_ref, article in text_dict.items()
    }
    agg_name = ':'.join(cased_shortforms)
    dir_path = os.path.join(DATA_PATH, 'texts', agg_name)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    with open(os.path.join(dir_path, f'{agg_name}_texts.json'), 'w') as f:
        json.dump(text_dict, f)
    with open(os.path.join(dir_path, f'{agg_name}_text_map.json'), 'w') as f:
        json.dump(ref_dict, f)
Пример #5
0
def get_text_content(pmid):
    content = get_text_content_from_text_refs(text_refs={'PMID': pmid}, db=db)
    if content:
        text = universal_extract_text(content)
        return text
    return None