Пример #1
0
def test_filter_emmaa_stmts():
    estmt1 = EmmaaStatement(stmt, date, search_terms, {'internal': True})
    estmt2 = EmmaaStatement(stmt, date, search_terms, {'internal': False})
    estmt3 = EmmaaStatement(stmt, date, search_terms)
    del estmt3.metadata  # Imitate older style statement withou metadata
    # Only estmt2 with internal False should be filtered out
    filtered_estmts = filter_emmaa_stmts_by_metadata([estmt1, estmt2, estmt3],
                                                     {'internal': True})
    assert len(filtered_estmts) == 2
    assert estmt1 in filtered_estmts
    assert estmt3 in filtered_estmts
Пример #2
0
def test_model_extend():
    ev1 = Evidence(pmid='1234', text='abcd', source_api='x')
    ev2 = Evidence(pmid='1234', text='abcde', source_api='x')
    ev3 = Evidence(pmid='1234', text='abcd', source_api='x')
    indra_sts = [Phosphorylation(None, Agent('a'), evidence=ev) for ev in
                 [ev1, ev2, ev3]]
    emmaa_sts = [EmmaaStatement(st, datetime.datetime.now(), []) for st in
                 indra_sts]
    em = EmmaaModel('x', {'search_terms': [], 'ndex': {'network': None}})
    em.add_statements([emmaa_sts[0]])
    em.extend_unique(emmaa_sts[1:])
    assert len(em.stmts) == 2
    stmt = EmmaaStatement(Phosphorylation(None, Agent('b'), evidence=ev1),
                          datetime.datetime.now(), [])
    em.extend_unique([stmt])
    assert len(em.stmts) == 3
Пример #3
0
def generate_model(model_name):
    """Generate a simple model for end-to-end testing using natural language."""
    tp = trips.process_text('BRAF activates MAP2K1. '
                            'Active MAP2K1 activates MAPK1.')
    indra_stmts = tp.statements
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), 'MAPK1')
        for stmt in indra_stmts
    ]
    # Create a CXAssembled model, upload to NDEx and retrieve key
    #cxa = CxAssembler(indra_stmts)
    #cxa.make_model()
    #ndex_id = cxa.upload_model(private=False)
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    emmaa_model = EmmaaModel(model_name, config_dict)
    emmaa_model.add_statements(emmaa_stmts)
    return emmaa_model, config_dict
Пример #4
0
def read_elsevier_eidos_search_terms(piis_to_terms):
    """Return extracted EmmaaStatements given a dict of PIIS to SearchTerms.

    Parameters
    ----------
    piis_to_terms : dict
        A dict representing a set of PIIs pointing to search terms that
        produced them.

    Returns
    -------
    list[:py:class:`emmaa.model.EmmaaStatement`]
        A list of EmmaaStatements extracted from the given PMIDs.
    """
    piis = list(piis_to_terms.keys())
    date = datetime.datetime.utcnow()
    texts = read_piis(piis)
    pii_stmts = process_texts(texts)
    estmts = []
    for pii, stmts in pii_stmts.items():
        for stmt in stmts:
            for evid in stmt.evidence:
                evid.annotations['pii'] = pii
            es = EmmaaStatement(stmt, date, piis_to_terms[pii])
            estmts.append(es)
    return estmts
Пример #5
0
def read_db_ids_search_terms(id_search_terms, id_type):
    """Return extracted EmmaaStatements from INDRA database given an
    ID-search term dict.

    Parameters
    ----------
    id_search_terms : dict
        A dict representing a set of IDs pointing to search terms that
        produced them.

    Returns
    -------
    list[:py:class:`emmaa.model.EmmaaStatement`]
        A list of EmmaaStatements extracted from the given IDs.
    """
    ids = list(id_search_terms.keys())
    date = datetime.datetime.utcnow()
    db = get_primary_db()
    id_stmts = get_raw_stmt_jsons_from_papers(ids, id_type=id_type, db=db)
    estmts = []
    for _id, stmt_jsons in id_stmts.items():
        stmts = stmts_from_json(stmt_jsons)
        for stmt in stmts:
            es = EmmaaStatement(stmt, date, id_search_terms[_id])
            estmts.append(es)
    return estmts
Пример #6
0
def read_db_pmid_search_terms(pmid_search_terms):
    """Return extracted EmmaaStatements from INDRA database given a
    PMID-search term dict.

    Parameters
    ----------
    pmid_search_terms : dict
        A dict representing a set of PMIDs pointing to search terms that
        produced them.

    Returns
    -------
    list[:py:class:`emmaa.model.EmmaaStatement`]
        A list of EmmaaStatements extracted from the given PMIDs.
    """
    pmids = list(pmid_search_terms.keys())
    date = datetime.datetime.utcnow()
    db = get_primary_db()
    pmid_stmts = get_statements_by_paper(pmids, id_type='pmid', db=db,
                                         preassembled=False)
    estmts = []
    for pmid, stmts in pmid_stmts.items():
        for stmt in stmts:
            es = EmmaaStatement(stmt, date, pmid_search_terms[pmid])
            estmts.append(es)
    return estmts
Пример #7
0
def test_model_json():
    """Test the json structure and content of EmmaaModel.to_json() output"""
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates MAPK1.')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.add_statements(emmaa_stmts)

    emmaa_model_json = emmaa_model.to_json()

    # Test json structure
    assert emmaa_model_json['name'] == 'test'
    assert isinstance(emmaa_model_json['stmts'], list)
    assert emmaa_model_json['ndex_network'] == \
        'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'

    # Test config
    assert emmaa_model_json['search_terms'][0]['type'] == 'gene'
    assert emmaa_model_json['search_terms'][0]['db_refs'] == {'HGNC': '20974'}

    # Test json statements
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'Active MAP2K1 activates MAPK1.' == \
           emmaa_model_json['stmts'][1]['stmt']['evidence'][0]['text']
    assert emmaa_model_json['stmts'][0]['stmt']['subj']['name'] == 'BRAF'
    assert emmaa_model_json['stmts'][1]['stmt']['subj']['name'] == 'MAP2K1'
    assert emmaa_model_json['stmts'][1]['stmt']['obj']['name'] == 'MAPK1'

    # Need hashes to be strings so that javascript can read them
    assert isinstance(
        emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['source_hash'],
        str)
Пример #8
0
 def make_gene_statements(self):
     """Generate Statements from the gene list."""
     drug_names = [st.name for st in self.search_terms if
                   st.type == 'drug']
     indra_stmts = get_stmts_for_gene_list(self.gene_list, drug_names)
     estmts = [EmmaaStatement(stmt, datetime.datetime.now(), [])
               for stmt in indra_stmts]
     self.stmts = estmts
Пример #9
0
def create_model(relevance=None, paper_ids=None):
    indra_stmts = [
        Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                   Agent('MAP2K1', db_refs={'HGNC': '6840'}),
                   evidence=[Evidence(text='BRAF activates MAP2K1.',
                                      source_api='assertion',
                                      text_refs={'TRID': '1234'})]),
        Activation(Agent('MAP2K1', db_refs={'HGNC': '6840'},
                         activity=ActivityCondition('activity', True)),
                   Agent('MAPK1', db_refs={'HGNC': '6871'}),
                   evidence=[Evidence(text='Active MAP2K1 activates MAPK1.',
                                      source_api='assertion',
                                      text_refs={'TRID': '2345'})])
        ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(
            indra_stmts[0], datetime.datetime.now(), [st],
            {'internal': True, 'curated': False}),
        EmmaaStatement(
            indra_stmts[1], datetime.datetime.now(), [st],
            {'internal': True, 'curated': True})
        ]
    config_dict = {
        'ndex': {'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'},
        'search_terms': [{'db_refs': {'HGNC': '20974'}, 'name': 'MAPK1',
                          'search_term': 'MAPK1', 'type': 'gene'}],
        'human_readable_name': 'Test Model',
        'test': {
            'statement_checking': {'max_path_length': 5, 'max_paths': 1},
            'test_corpus': 'simple_tests',
            'mc_types': ['pysb', 'pybel', 'signed_graph', 'unsigned_graph']},
        'assembly': [
            {'function': 'filter_no_hypothesis'},
            {'function': 'map_grounding'},
            {'function': 'filter_grounded_only'},
            {'function': 'filter_human_only'},
            {'function': 'map_sequence'},
            {'function': 'run_preassembly', 'kwargs': {
                'return_toplevel': False}}]}
    if relevance:
        config_dict['assembly'].append(
            {'function': 'filter_relevance', 'kwargs': {'policy': relevance}})
    emmaa_model = EmmaaModel('test', config_dict, paper_ids)
    emmaa_model.add_statements(emmaa_stmts)
    return emmaa_model
Пример #10
0
def upload_prior(ctype, config):
    fname = f'../models/{ctype}/prior_stmts.pkl'
    with open(fname, 'rb') as fh:
        stmts = pickle.load(fh)
    estmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), []) for stmt in stmts
    ]
    model = EmmaaModel(ctype, config)
    model.add_statements(estmts)
    model.update_to_ndex()
Пример #11
0
def get_emmaa_statements(stmts, gene_names):
    def is_internal(stmt):
        # If all the agents are gene names, this is an internal statement.
        # We classify any statements with drugs in them as external.
        return all([a.name in gene_names for a in stmt.real_agent_list()])

    estmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [],
                       {'internal': is_internal(stmt)}) for stmt in stmts
    ]
    return estmts
Пример #12
0
def test_filter_relevance():
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.',
                                       source_api='assertion')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates '
                                            'MAPK1.',
                                       source_api='assertion')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]

    # Try no filter first
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 2, emmaa_model.assembled_stmts

    # Next do a prior_one filter
    config_dict['assembly'] = {'filter_relevance': 'prior_one'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 1, emmaa_model.assembled_stmts
    assert emmaa_model.assembled_stmts[0].obj.name == 'MAPK1'

    # Next do a prior_all filter
    config_dict['assembly'] = {'filter_relevance': 'prior_all'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 0
Пример #13
0
    def make_gene_statements(self) -> List[EmmaaStatement]:
        """Generate Statements from the gene list."""
        if self.stmts:
            return self.stmts

        def is_internal(stmt):
            # If all the agents are gene names, this is an internal statement.
            # We classify any statements with drugs in them as external.
            return all([a.name in self.gene_list
                        for a in stmt.real_agent_list()])

        drug_names = [st.name for st in self.search_terms if
                      st.type == 'drug']
        indra_stmts = get_stmts_for_gene_list(self.gene_list, drug_names)
        estmts = [EmmaaStatement(stmt, datetime.datetime.now(), [],
                                 {'internal': is_internal(stmt)})
                  for stmt in indra_stmts]
        self.stmts = estmts
        return self.stmts
Пример #14
0
    def get_statements(self, mode='all', batch_size=100):
        """Return EMMAA Statements for this prior's literature set.

        Parameters
        ----------
        mode : 'all' or 'distilled'
            The 'distilled' mode makes sure that the "best", non-redundant
            set of raw statements are found across potentially redundant text
            contents and reader versions. The 'all' mode doesn't do such
            distillation but is significantly faster.
        batch_size : Optional[int]
            Determines how many PMIDs to fetch statements for in each
            iteration. Default: 100.

        Returns
        -------
        list of EmmaaStatement
            A list of EMMAA Statements corresponding to extractions from
            the subset of literature defined by this prior's search terms.
        """
        if self.stmts:
            return self.stmts
        terms_to_pmids = \
            EmmaaModel.search_pubmed(search_terms=self.search_terms,
                                     date_limit=None)
        pmids_to_terms = defaultdict(list)
        for term, pmids in terms_to_pmids.items():
            for pmid in pmids:
                pmids_to_terms[pmid].append(term)
        pmids_to_terms = dict(pmids_to_terms)
        all_pmids = set(pmids_to_terms.keys())
        raw_statements_by_pmid = \
            get_raw_statements_for_pmids(all_pmids, mode=mode,
                                         batch_size=batch_size)
        timestamp = datetime.datetime.now()
        for pmid, stmts in raw_statements_by_pmid.items():
            for stmt in stmts:
                self.stmts.append(
                    EmmaaStatement(stmt, timestamp, pmids_to_terms[pmid],
                                   {'internal': True}))
        return self.stmts
Пример #15
0
def update_cancer(cancer_type):
    """Update the model for the given cancer.

    A JSON config file must be present for the given cancer type, located in
    the models/<cancer_type>/config.json.

    Parameters
    ----------
    cancer_type : str
        A short string which is the name of the cancer, and corresponds to a
        directory in the models directory, as described above.
    """
    print(cancer_type)
    with open(f'models/{cancer_type}/prior_stmts.pkl', 'rb') as fh:
        stmts = pickle.load(fh)
    config = json.load(open(f'models/{cancer_type}/config.json', 'r'))
    em = EmmaaModel(cancer_type, config)
    ess = [EmmaaStatement(st, datetime.datetime.now(), []) for st in stmts]
    em.add_statements(ess)
    em.save_to_s3()
    return
Пример #16
0
def test_direct_path_tests():
    model = create_model()
    stmt = Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                      Agent('MAPK1', db_refs={'UP': 'P28482'}))
    model.stmts.append(
        EmmaaStatement(stmt, datetime.datetime.now(), [], {
            'internal': True,
            'curated': False
        }))
    tests = [StatementCheckingTest(stmt)]
    mm = ModelManager(model)
    tm = TestManager([mm], tests)
    tm.make_tests(ScopeTestConnector())
    tm.run_tests(allow_direct=True)
    for mc_type in ['pysb', 'signed_graph', 'unsigned_graph']:
        res = mm.mc_types[mc_type]['test_results'][0]
        print(res.paths)
        assert len(res.paths[0]) == 2, (mc_type, res.paths[0])  # 1 edge
    tm.run_tests(allow_direct=False)
    for mc_type in ['pysb', 'signed_graph', 'unsigned_graph']:
        # Look at the seecond test result here
        res = mm.mc_types[mc_type]['test_results'][1]
        assert len(res.paths[0]) == 3, (mc_type, res.paths[0])  # 2 edges
Пример #17
0
def read_pmid_search_terms(pmid_search_terms):
    """Return extracted EmmaaStatements given a PMID-search term dict.

    Parameters
    ----------
    pmid_search_terms : dict
        A dict representing a set of PMIDs pointing to search terms that
        produced them.

    Returns
    -------
    list[:py:class:`emmaa.model.EmmaaStatement`]
        A list of EmmaaStatements extracted from the given PMIDs.
    """
    pmids = list(pmid_search_terms.keys())
    date = datetime.datetime.utcnow()
    pmid_stmts = read_pmids(pmids, date)
    estmts = []
    for pmid, stmts in pmid_stmts.items():
        for stmt in stmts:
            es = EmmaaStatement(stmt, date, pmid_search_terms[pmid])
            estmts.append(es)
    return estmts