Пример #1
0
    def send_model_diagnoses(self, res):
        diagnostic_tells = []

        # SUGGESTIONS
        # If there is an explanation, english assemble it
        expl_path = res.get('explanation_path')
        if expl_path:
            # Only send this if we haven't already sent an explanation
            if not self.have_explanation:
                ea_path = EnglishAssembler(expl_path)
                path_str = ea_path.make_model()
                ea_goal = EnglishAssembler([self.mra.explain])
                goal_str = ea_goal.make_model()
                if path_str and goal_str:
                    explanation_str = (
                        'Our model can now explain how %s: <i>%s</i>' %
                        (goal_str[:-1], path_str))
                    diagnostic_tells.append(explanation_str)

        # If there is a suggestion, say it
        suggs = res.get('stmt_suggestions')
        if suggs:
            say = 'I have some suggestions on how to complete our model.'
            say += ' We could try modeling one of:<br>'
            stmt_str = '<ul>%s</ul>' % \
                       ''.join([('<li>%s</li>' % EnglishAssembler([stmt]).make_model())
                                for stmt in suggs])
            say += stmt_str
            diagnostic_tells.append(say)

        # If there are corrections
        corrs = res.get('stmt_corrections')
        if corrs:
            stmt = corrs[0]
            say = 'It looks like a required activity may be missing,'
            say += ' say \'%s\' to add it.' % \
                   (EnglishAssembler([stmt]).make_model())
            diagnostic_tells.append(say)

        # Finally, say all we have to say
        for text in diagnostic_tells:
            content = KQMLList('SPOKEN')
            content.sets('WHAT', text)
            # TELLING DIRECTLY HERE IS CURRENTLY INACTIVATED,
            # IT'S THE BA's RESPONSIBILITY TO DO THIS
            # self.tell(content)

        return diagnostic_tells
def indraStatementToDict(stmt):
    ea = EnglishAssembler([stmt])
    txt = ea.make_model()
    pmid = stmt.evidence[0].pmid
    _type = get_type(stmt)

    return {'pmid': pmid, 'text': txt, 'type': _type}
Пример #3
0
def _format_stmt_text(stmt):
    # Get the English assembled statement
    ea = EnglishAssembler([stmt])
    english = ea.make_model()
    if not english:
        english = str(stmt)
        return tag_agents(english, stmt.agent_list())
    return tag_agents(english, ea.stmt_agents[0])
Пример #4
0
def assemble_english(stmts):
    txts = []
    for stmt in stmts:
        ea = EnglishAssembler([stmt])
        txt = ea.make_model()
        if txt and txt[-1] == '.':
            txt = txt[:-1]
            txts.append(txt)
    return txts
Пример #5
0
def assemble_english(stmts):
    txts = []
    for stmt in stmts:
        ea = EnglishAssembler([stmt])
        txt = ea.make_model()
        if txt and txt[-1] == '.':
            txt = txt[:-1]
            txts.append(txt)
    return txts
Пример #6
0
def make_english_output(results, model, stmts):
    citations = {}
    citation_count = 1
    for source, target, polarity, value, found_path, paths, flag in results:
        cond = 'How does treatment with %s %s %s?' % \
            (source, 'increase' if polarity == 'positive' else
                     'decrease', target)
        print(cond)
        print('=' * len(cond))
        if paths:
            path = paths[0]
            sentences = []
            for i, (path_rule, sign) in enumerate(path):
                for rule in model.rules:
                    if rule.name == path_rule:
                        stmt = _stmt_from_rule(model, path_rule, stmts)
                        if i == 0:
                            sentences.append(
                                '%s is a target of %s.' %
                                (stmt.agent_list()[0].name, source))

                        # Make citations
                        pmids = [ev.pmid for ev in stmt.evidence if ev.pmid]
                        cit_nums = []
                        for pmid in pmids:
                            cit_num = citations.get(pmid)
                            if cit_num is None:
                                citations[pmid] = citation_count
                                cit_num = citation_count
                                citation_count += 1
                            cit_nums.append(cit_num)
                        if cit_nums:
                            cit_nums = sorted(list(set(cit_nums)))
                            cit_str = ' [%s]' % (','.join(
                                [str(c) for c in cit_nums]))
                        else:
                            cit_str = ''
                        ea = EnglishAssembler([stmt])
                        sentence = ea.make_model()
                        sentence = sentence[:-1] + cit_str + '.'
                        sentences.append(sentence)
            sentences[-1] = sentences[-1][:-1] + \
                ', which is measured by %s.' % target
            text = ' '.join(sentences)
            print('INDRA\'s hypothesis: ' + text)
        elif found_path:
            print('INDRA determined that there exists an explanation but'
                  ' it is intractable to reconstruct.')
        else:
            print('INDRA couldn\'t find an explanation for this observation.')
        print('\n')
    references = 'References\n==========\n'
    for k, v in sorted(citations.items(), key=lambda x: x[1]):
        references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k)
    print(references)
Пример #7
0
    def send_model_diagnoses(self, res):
        # SUGGESTIONS
        # If there is an explanation, english assemble it
        expl_path = res.get('explanation_path')
        if expl_path:
            # Only send this if we haven't already sent an explanation
            if not self.have_explanation:
                ea_path = EnglishAssembler(expl_path)
                path_str = ea_path.make_model()
                ea_goal = EnglishAssembler([self.mra.explain])
                goal_str = ea_goal.make_model()
                if path_str and goal_str:
                    explanation_str = (
                            'Our model can now explain how %s: <i>%s</i>' %
                            (goal_str[:-1], path_str))
                    content = KQMLList('SPOKEN')
                    content.sets('WHAT', explanation_str)
                    self.tell(content)

        # If there is a suggestion, say it
        suggs = res.get('stmt_suggestions')
        if suggs:
            say = 'I have some suggestions on how to complete our model.'
            say += ' We could try modeling one of:<br>'
            stmt_str = '<ul>%s</ul>' % \
                       ''.join([('<li>%s</li>' % EnglishAssembler([stmt]).make_model())
                                for stmt in suggs])
            say += stmt_str
            content = KQMLList('SPOKEN')
            content.sets('WHAT', say)
            self.tell(content)

        # If there are corrections
        corrs = res.get('stmt_corrections')
        if corrs:
            stmt = corrs[0]
            say = 'It looks like a required activity is missing,'
            say += ' consider revising to <i>%s</i>' % \
                   (EnglishAssembler([stmt]).make_model())
            content = KQMLList('SPOKEN')
            content.sets('WHAT', say)
            self.tell(content)
Пример #8
0
def report_paths(scored_paths, model, stmts, cell_line):
    """Report paths for a specific cell line."""
    citations = {}
    citation_count = 1
    ab_name = 'Total c-Jun'
    if cell_line == 'C32':
        pol = 'decreased'
    else:
        pol = 'increased'
    for drug in scored_paths.keys():
        paths = scored_paths[drug]
        for path, score in paths[:1]:
            title = 'How does %s treatment result in %s %s' % \
                (drug, pol, ab_name)
            title += ' in %s cells?' % cell_line
            print(title)
            print('=' * len(title))
            path_stmts = stmts_from_path(path, model, stmts)
            sentences = []
            for i, stmt in enumerate(path_stmts):
                if i == 0:
                    target = stmt.agent_list()[0].name
                    sentences.append('%s is a target of %s.' % (target, drug))
                # Make citations
                pmids = [ev.pmid for ev in stmt.evidence if ev.pmid]
                cit_nums = []
                for pmid in pmids:
                    cit_num = citations.get(pmid)
                    if cit_num is None:
                        citations[pmid] = citation_count
                        cit_num = citation_count
                        citation_count += 1
                    cit_nums.append(cit_num)
                if cit_nums:
                    cit_nums = sorted(list(set(cit_nums)))
                    cit_str = ' [%s]' % (','.join([str(c) for c in cit_nums]))
                else:
                    cit_str = ''
                ea = EnglishAssembler([stmt])
                sentence = ea.make_model()
                sentence = sentence[:-1] + cit_str + '.'
                sentences.append(sentence)
            sentences[-1] = sentences[-1][:-1] + \
                ', which is measured by %s.' % ab_name
            print(' '.join(sentences))
            print()
    references = 'References\n==========\n'
    for k, v in sorted(citations.items(), key=lambda x: x[1]):
        references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k)
    print(references)
Пример #9
0
    def respond_describe_model(self, content):
        """Convert the model to natural language."""
        # Get the model.
        model_id = self._get_model_id(content)
        model = self.mra.get_model_by_id(model_id)

        # Turn the model into a text description.
        english_assembler = EnglishAssembler(model)
        desc = english_assembler.make_model()

        # Respond to the BA.
        resp = KQMLList('SUCCESS')
        resp.sets('description', desc)
        return resp
Пример #10
0
    def respond_describe_model(self, content):
        """Convert the model to natural language."""
        # Get the model.
        model_id = self._get_model_id(content)
        model = self.mra.get_model_by_id(model_id)

        # Turn the model into a text description.
        english_assembler = EnglishAssembler(model)
        desc = english_assembler.make_model()

        # Respond to the BA.
        resp = KQMLList('SUCCESS')
        resp.sets('description', desc)
        return resp
Пример #11
0
def assemble_english():
    """Assemble each statement into """
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    sentences = {}
    for st in stmts:
        enga = EnglishAssembler()
        enga.add_statements([st])
        model_str = enga.make_model()
        sentences[st.uuid] = model_str
    res = {'sentences': sentences}
    return res
Пример #12
0
def assemble_english():
    """Assemble each statement into """
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    sentences = {}
    for st in stmts:
        enga = EnglishAssembler()
        enga.add_statements([st])
        model_str = enga.make_model()
        sentences[st.uuid] = model_str
    res = {'sentences': sentences}
    return res
Пример #13
0
 def _format_stmt_text(stmt):
     # Get the English assembled statement
     ea = EnglishAssembler([stmt])
     english = ea.make_model()
     if not english:
         english = str(stmt)
     indices = []
     for ag in stmt.agent_list():
         if ag is None or not ag.name:
             continue
         url = id_url(ag)
         if url is None:
             continue
         # Build up a set of indices
         tag_start = "<a href='%s'>" % url
         tag_close = "</a>"
         # FIXME: the EnglishAssembler capitalizes the first letter of
         # each sentence. In some cases this causes no match here
         # and not produce agent links.
         indices += [(m.start(), m.start() + len(ag.name), ag.name,
                      tag_start, tag_close)
                     for m in re.finditer(re.escape(ag.name), english)]
     return tag_text(english, indices)
Пример #14
0
 def _format_stmt_text(stmt):
     # Get the English assembled statement
     ea = EnglishAssembler([stmt])
     english = ea.make_model()
     if not english:
         english = str(stmt)
     indices = []
     for ag in stmt.agent_list():
         if ag is None or not ag.name:
             continue
         url = id_url(ag)
         if url is None:
             continue
         # Build up a set of indices
         tag_start = "<a href='%s'>" % url
         tag_close = "</a>"
         # FIXME: the EnglishAssembler capitalizes the first letter of
         # each sentence. In some cases this causes no match here
         # and not produce agent links.
         indices += [(m.start(), m.start() + len(ag.name), ag.name,
                      tag_start, tag_close)
                      for m in re.finditer(re.escape(ag.name), english)]
     return tag_text(english, indices)
Пример #15
0
def get_annotation_text(stmt, annotate_agents=True):
    ea = EnglishAssembler(stmts=[stmt])
    annotation_text = ea.make_model()
    if annotate_agents:
        inserts = []
        for agent_wc in ea.stmt_agents[0]:
            for insert_begin, insert_len in inserts:
                if insert_begin < agent_wc.coords[0]:
                    agent_wc.update_coords(insert_len)
            db_ns, db_id = get_grounding(agent_wc.db_refs, grounding_ns)
            if not db_ns:
                continue
            identifiers_url = \
                identifiers.get_identifiers_url(db_ns, db_id)
            grounding_text = '[%s](%s)' % (agent_wc.name, identifiers_url)
            insert_len = len(grounding_text) - agent_wc.coords[1] + \
                agent_wc.coords[0]
            inserts.append((agent_wc.coords[0], insert_len))
            before_part = annotation_text[:agent_wc.coords[0]]
            after_part = annotation_text[agent_wc.coords[1]:]
            annotation_text = ''.join(
                [before_part, grounding_text, after_part])
    return annotation_text
Пример #16
0
    def post(self):
        """Assemble each statement into English sentence.

        Parameters
        ----------
        statements : list[indra.statements.Statement.to_json()]
            A list of INDRA Statements to assemble.

        Returns
        -------
        sentences : dict
            Dictionary mapping Statement UUIDs with English sentences.
        """
        args = request.json
        stmts_json = args.get('statements')
        stmts = stmts_from_json(stmts_json)
        sentences = {}
        for st in stmts:
            enga = EnglishAssembler()
            enga.add_statements([st])
            model_str = enga.make_model()
            sentences[st.uuid] = model_str
        res = {'sentences': sentences}
        return res
Пример #17
0
def stmt_to_english(stmt):
    """Return an English assembled Statement as a sentence."""
    ea = EnglishAssembler([stmt])
    return ea.make_model()[:-1]
Пример #18
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [stmt for stmt in related_stmts
                       if stmt not in background_assertions]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief, reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made 
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Пример #19
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(bio_ontology)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(bio_ontology, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Пример #20
0
def path_to_english(path, model, stmts):
    path_stmts = stmts_from_path(path, model, stmts)
    ea = EnglishAssembler(path_stmts)
    return ea.make_model()
Пример #21
0
def stmt_to_english(stmt):
    """Return an English assembled Statement as a sentence."""
    ea = EnglishAssembler([stmt])
    return ea.make_model()[:-1]
Пример #22
0
stmt_freq = [(_stmt_from_rule(model, r[0], statements), r[1])
             for r in dist_filt]
combined_freq = {}
for stmt, freq in stmt_freq:
    if stmt.uuid not in combined_freq:
        combined_freq[stmt.uuid] = (stmt, freq)
    else:
        _, old_freq = combined_freq[stmt.uuid]
        combined_freq[stmt.uuid] = (stmt, freq + old_freq)
top_stmts = list(combined_freq.values())
top_stmts.sort(key=lambda x: x[1], reverse=True)

desc = []
for s, freq in top_stmts:
    ea = EnglishAssembler([s])
    text = ea.make_model()
    desc.append((text, freq))
for t, f in desc[:30]:
    print('%s,%s' % (t, f))
"""
str_names, freqs = zip(*dist_filt)
num_genes = 30
plt.ion()
plt.figure(figsize=(5,2), dpi=150)
ypos = np.array(range(num_genes)) * 1.0
plt.bar(ypos, freqs[:num_genes], align='center')
plt.xticks(ypos, str_names[:num_genes], rotation='vertical')
ax = plt.gca()
plt.ylabel('Frequency')
plt.subplots_adjust(bottom=0.3)
pf.format_axis(ax)
Пример #23
0
    sample_rows = []
    max_sample_size = 20
    for rule, freq in frequencies:
        stmts = stmts_by_rule[rule]
        if max_sample_size < len(stmts):
            sample_stmts = np.random.choice(stmts,
                                            max_sample_size,
                                            replace=False)
        else:
            sample_stmts = stmts
        for stmt in sample_stmts:
            for ag in stmt.agent_list():
                if ag is not None:
                    ag.name = ag.db_refs.get('TEXT')
            is_hypothesis = stmt.evidence[0].epistemics.get('hypothesis', '')
            is_direct = stmt.evidence[0].epistemics.get('direct', '')
            # Get the English assembly of the statement
            eng = EnglishAssembler([stmt])
            eng_sentence = eng.make_model()
            if eng_sentence == '':
                eng_sentence = str(stmt)
            sample_rows.append([
                eng_sentence, is_hypothesis, '', '', '', stmt.evidence[0].pmid,
                stmt.evidence[0].text, rule, freq, stmt, is_direct
            ])

    write_unicode_csv('stmts_by_rule_to_curate.tsv',
                      sample_rows,
                      delimiter='\t')
    write_unicode_csv('reach_rule_frequencies.tsv', frequencies,
                      delimiter='\t')

    sample_rows = []
    max_sample_size = 20
    for rule, freq in frequencies:
        stmts = stmts_by_rule[rule]
        if max_sample_size < len(stmts):
            sample_stmts = np.random.choice(stmts,
                                            max_sample_size, replace=False)
        else:
            sample_stmts = stmts
        for stmt in sample_stmts:
            for ag in stmt.agent_list():
                if ag is not None:
                    ag.name = ag.db_refs.get('TEXT')
            is_hypothesis = stmt.evidence[0].epistemics.get('hypothesis', '')
            is_direct = stmt.evidence[0].epistemics.get('direct', '')
            # Get the English assembly of the statement
            eng = EnglishAssembler([stmt])
            eng_sentence = eng.make_model()
            if eng_sentence == '':
                eng_sentence = str(stmt)
            sample_rows.append([eng_sentence, is_hypothesis, '', '', '',
                                stmt.evidence[0].pmid,
                                stmt.evidence[0].text, rule, freq, stmt,
                                is_direct])

    write_unicode_csv('stmts_by_rule_to_curate.tsv', sample_rows,
                      delimiter='\t')
Пример #25
0
def render_stmt_graph(statements, reduce=True, english=False, rankdir=None,
                      agent_style=None):
    """Render the statement hierarchy as a pygraphviz graph.

    Parameters
    ----------
    stmts : list of :py:class:`indra.statements.Statement`
        A list of top-level statements with associated supporting statements
        resulting from building a statement hierarchy with
        :py:meth:`combine_related`.
    reduce : bool
        Whether to perform a transitive reduction of the edges in the graph.
        Default is True.
    english : bool
        If True, the statements in the graph are represented by their
        English-assembled equivalent; otherwise they are represented as
        text-formatted Statements.
    rank_dir : str or None
        Argument to pass through to the  pygraphviz `AGraph` constructor
        specifying graph layout direction. In particular, a value of 'LR'
        specifies a left-to-right direction. If None, the pygraphviz default
        is used.
    agent_style : dict or None
        Dict of attributes specifying the visual properties of nodes. If None,
        the following default attributes are used::

            agent_style = {'color': 'lightgray', 'style': 'filled',
                           'fontname': 'arial'}

    Returns
    -------
    pygraphviz.AGraph
        Pygraphviz graph with nodes representing statements and edges pointing
        from supported statements to supported_by statements.

    Examples
    --------
    Pattern for getting statements and rendering as a Graphviz graph:

    >>> from indra.preassembler.hierarchy_manager import hierarchies
    >>> braf = Agent('BRAF')
    >>> map2k1 = Agent('MAP2K1')
    >>> st1 = Phosphorylation(braf, map2k1)
    >>> st2 = Phosphorylation(braf, map2k1, residue='S')
    >>> pa = Preassembler(hierarchies, [st1, st2])
    >>> pa.combine_related() # doctest:+ELLIPSIS
    [Phosphorylation(BRAF(), MAP2K1(), S)]
    >>> graph = render_stmt_graph(pa.related_stmts)
    >>> graph.write('example_graph.dot') # To make the DOT file
    >>> graph.draw('example_graph.png', prog='dot') # To make an image

    Resulting graph:

    .. image:: /images/example_graph.png
        :align: center
        :alt: Example statement graph rendered by Graphviz

    """
    from indra.assemblers.english import EnglishAssembler
    # Set the default agent formatting properties
    if agent_style is None:
        agent_style = {'color': 'lightgray', 'style': 'filled',
                       'fontname': 'arial'}
    # Sets to store all of the nodes and edges as we recursively process all
    # of the statements
    nodes = set([])
    edges = set([])
    stmt_dict = {}

    # Recursive function for processing all statements
    def process_stmt(stmt):
        nodes.add(str(stmt.matches_key()))
        stmt_dict[str(stmt.matches_key())] = stmt
        for sby_ix, sby_stmt in enumerate(stmt.supported_by):
            edges.add((str(stmt.matches_key()), str(sby_stmt.matches_key())))
            process_stmt(sby_stmt)

    # Process all of the top-level statements, getting the supporting statements
    # recursively
    for stmt in statements:
        process_stmt(stmt)
    # Create a networkx graph from the nodes
    nx_graph = nx.DiGraph()
    nx_graph.add_edges_from(edges)
    # Perform transitive reduction if desired
    if reduce:
        nx_graph = nx.algorithms.dag.transitive_reduction(nx_graph)
    # Create a pygraphviz graph from the nx graph
    try:
        pgv_graph = pgv.AGraph(name='statements', directed=True,
                               rankdir=rankdir)
    except NameError:
        logger.error('Cannot generate graph because '
                     'pygraphviz could not be imported.')
        return None
    for node in nx_graph.nodes():
        stmt = stmt_dict[node]
        if english:
            ea = EnglishAssembler([stmt])
            stmt_str = ea.make_model()
        else:
            stmt_str = str(stmt)
        pgv_graph.add_node(node,
                           label='%s (%d)' % (stmt_str, len(stmt.evidence)),
                           **agent_style)
    pgv_graph.add_edges_from(nx_graph.edges())
    return pgv_graph
Пример #26
0
def render_stmt_graph(statements,
                      reduce=True,
                      english=False,
                      rankdir=None,
                      agent_style=None):
    """Render the statement hierarchy as a pygraphviz graph.

    Parameters
    ----------
    stmts : list of :py:class:`indra.statements.Statement`
        A list of top-level statements with associated supporting statements
        resulting from building a statement hierarchy with
        :py:meth:`combine_related`.
    reduce : bool
        Whether to perform a transitive reduction of the edges in the graph.
        Default is True.
    english : bool
        If True, the statements in the graph are represented by their
        English-assembled equivalent; otherwise they are represented as
        text-formatted Statements.
    rank_dir : str or None
        Argument to pass through to the  pygraphviz `AGraph` constructor
        specifying graph layout direction. In particular, a value of 'LR'
        specifies a left-to-right direction. If None, the pygraphviz default
        is used.
    agent_style : dict or None
        Dict of attributes specifying the visual properties of nodes. If None,
        the following default attributes are used::

            agent_style = {'color': 'lightgray', 'style': 'filled',
                           'fontname': 'arial'}

    Returns
    -------
    pygraphviz.AGraph
        Pygraphviz graph with nodes representing statements and edges pointing
        from supported statements to supported_by statements.

    Examples
    --------
    Pattern for getting statements and rendering as a Graphviz graph:

    >>> from indra.ontology.bio import bio_ontology
    >>> braf = Agent('BRAF')
    >>> map2k1 = Agent('MAP2K1')
    >>> st1 = Phosphorylation(braf, map2k1)
    >>> st2 = Phosphorylation(braf, map2k1, residue='S')
    >>> pa = Preassembler(bio_ontology, [st1, st2])
    >>> pa.combine_related() # doctest:+ELLIPSIS
    [Phosphorylation(BRAF(), MAP2K1(), S)]
    >>> graph = render_stmt_graph(pa.related_stmts)
    >>> graph.write('example_graph.dot') # To make the DOT file
    >>> graph.draw('example_graph.png', prog='dot') # To make an image

    Resulting graph:

    .. image:: /images/example_graph.png
        :align: center
        :alt: Example statement graph rendered by Graphviz

    """
    import pygraphviz as pgv
    from indra.assemblers.english import EnglishAssembler
    # Set the default agent formatting properties
    if agent_style is None:
        agent_style = {
            'color': 'lightgray',
            'style': 'filled',
            'fontname': 'arial'
        }
    # Sets to store all of the nodes and edges as we recursively process all
    # of the statements
    nodes = set([])
    edges = set([])
    stmt_dict = {}

    # Recursive function for processing all statements
    def process_stmt(stmt):
        nodes.add(str(stmt.matches_key()))
        stmt_dict[str(stmt.matches_key())] = stmt
        for sby_ix, sby_stmt in enumerate(stmt.supported_by):
            edges.add((str(stmt.matches_key()), str(sby_stmt.matches_key())))
            process_stmt(sby_stmt)

    # Process all of the top-level statements, getting the supporting statements
    # recursively
    for stmt in statements:
        process_stmt(stmt)
    # Create a networkx graph from the nodes
    nx_graph = nx.DiGraph()
    nx_graph.add_edges_from(edges)
    # Perform transitive reduction if desired
    if reduce:
        nx_graph = nx.algorithms.dag.transitive_reduction(nx_graph)
    # Create a pygraphviz graph from the nx graph
    try:
        pgv_graph = pgv.AGraph(name='statements',
                               directed=True,
                               rankdir=rankdir)
    except NameError:
        logger.error('Cannot generate graph because '
                     'pygraphviz could not be imported.')
        return None
    for node in nx_graph.nodes():
        stmt = stmt_dict[node]
        if english:
            ea = EnglishAssembler([stmt])
            stmt_str = ea.make_model()
        else:
            stmt_str = str(stmt)
        pgv_graph.add_node(node,
                           label='%s (%d)' % (stmt_str, len(stmt.evidence)),
                           **agent_style)
    pgv_graph.add_edges_from(nx_graph.edges())
    return pgv_graph