def test_reground_texts(): er = EidosReader() er.initialize_reader() groundings = er.reground_texts(['rainfall', 'hunger']) assert groundings[0][0][0] == \ ('wm/concept/causal_factor/environmental/' 'meteorologic/precipitation/rainfall'), groundings assert groundings[1][0][0] == \ 'wm/concept/causal_factor/condition/famine', groundings
def reground_stmts(stmts): ont_manager = _make_un_ontology() eidos_reader = EidosReader() # Send the latest ontology and list of concept texts to Eidos yaml_str = yaml.dump(ont_manager.yaml_root) concepts = [] for stmt in stmts: for concept in stmt.agent_list(): concept_txt = concept.db_refs.get('TEXT') concepts.append(concept_txt) groundings = eidos_reader.reground_texts(concepts, yaml_str) # Update the corpus with new groundings idx = 0 for stmt in stmts: for concept in stmt.agent_list(): concept.db_refs['UN'] = groundings[idx] idx += 1 return stmts
def __init__(self, scorer=None, corpora=None): self.corpora = corpora if corpora else {} self.scorer = scorer if scorer else get_eidos_bayesian_scorer() self.ont_manager = _make_un_ontology() self.eidos_reader = EidosReader()
class LiveCurator(object): """Class coordinating the real-time curation of a corpus of Statements. Parameters ---------- scorer : indra.belief.BeliefScorer A scorer object to use for the curation corpora : dict[str, Corpus] A dictionary mapping corpus IDs to Corpus objects. """ def __init__(self, scorer=None, corpora=None): self.corpora = corpora if corpora else {} self.scorer = scorer if scorer else get_eidos_bayesian_scorer() self.ont_manager = _make_un_ontology() self.eidos_reader = EidosReader() # TODO: generalize this to other kinds of scorers def reset_scorer(self): """Reset the scorer used for couration.""" self.scorer = get_eidos_bayesian_scorer() for corpus_id, corpus in self.corpora.items(): corpus.curations = {} def get_corpus(self, corpus_id): """Return a corpus given an ID. If the corpus ID cannot be found, an InvalidCorpusError is raised. Parameters ---------- corpus_id : str The ID of the corpus to return. Returns ------- Corpus The corpus with the given ID. """ try: corpus = self.corpora[corpus_id] return corpus except KeyError: raise InvalidCorpusError def submit_curation(self, corpus_id, curations): """Submit correct/incorrect curations fo a given corpus. Parameters ---------- corpus_id : str The ID of the corpus to which the curations apply. curations : dict A dict of curations with keys corresponding to Statement UUIDs and values corresponding to correct/incorrect feedback. """ corpus = self.get_corpus(corpus_id) # Start tabulating the curation counts prior_counts = {} subtype_counts = {} # Take each curation from the input for uuid, correct in curations.items(): # Save the curation in the corpus # TODO: handle already existing curation stmt = corpus.statements.get(uuid) if stmt is None: logger.warning('%s is not in the corpus.' % uuid) continue corpus.curations[uuid] = correct # Now take all the evidences of the statement and assume that # they follow the correctness of the curation and contribute to # counts for their sources for ev in stmt.evidence: # Make the index in the curation count list idx = 0 if correct else 1 extraction_rule = ev.annotations.get('found_by') # If there is no extraction rule then we just score the source if not extraction_rule: try: prior_counts[ev.source_api][idx] += 1 except KeyError: prior_counts[ev.source_api] = [0, 0] prior_counts[ev.source_api][idx] += 1 # Otherwise we score the specific extraction rule else: try: subtype_counts[ev.source_api][extraction_rule][idx] \ += 1 except KeyError: if ev.source_api not in subtype_counts: subtype_counts[ev.source_api] = {} subtype_counts[ev.source_api][extraction_rule] = [0, 0] subtype_counts[ev.source_api][extraction_rule][idx] \ += 1 # Finally, we update the scorer with the new curation counts self.scorer.update_counts(prior_counts, subtype_counts) def update_beliefs(self, corpus_id): """Return updated belief scores for a given corpus. Parameters ---------- corpus_id : str The ID of the corpus for which beliefs are to be updated. Returns ------- dict A dictionary of belief scores with keys corresponding to Statement UUIDs and values to new belief scores. """ corpus = self.get_corpus(corpus_id) be = BeliefEngine(self.scorer) stmts = list(corpus.statements.values()) be.set_prior_probs(stmts) # Here we set beliefs based on actual curation for uuid, correct in corpus.curations.items(): stmt = corpus.statements.get(uuid) if stmt is None: logger.warning('%s is not in the corpus.' % uuid) continue stmt.belief = correct belief_dict = {st.uuid: st.belief for st in stmts} return belief_dict def update_groundings(self, corpus_id): corpus = self.get_corpus(corpus_id) # Send the latest ontology and list of concept texts to Eidos yaml_str = yaml.dump(self.ont_manager.yaml_root) concepts = [] for stmt in corpus.raw_statements: for concept in stmt.agent_list(): concept_txt = concept.db_refs.get('TEXT') concepts.append(concept_txt) groundings = self.eidos_reader.reground_texts(concepts, yaml_str) # Update the corpus with new groundings idx = 0 for stmt in corpus.raw_statements: for concept in stmt.agent_list(): concept.db_refs['UN'] = groundings[idx] idx += 1 assembled_statements = default_assembly(corpus.raw_statements) corpus.statements = {s.uuid: s for s in assembled_statements} return assembled_statements
@app.route('/process_text', methods=['POST']) def process_text(): text = request.json.get('text') if not text: return {} res = er.process_text(text) return json.dumps(res) @app.route('/reground', methods=['POST']) def reground(): text = request.json.get('text') ont_yml = request.json.get('ont_yml', wm_yml) topk = request.json.get('topk', 10) is_canonicalized = request.json.get('is_canonicalized', False) if not text: return [] if isinstance(text, str): text = [text] res = er.reground_texts(text, ont_yml, topk=topk, is_canonicalized=is_canonicalized) return json.dumps(res) if __name__ == '__main__': port = int(sys.argv[1]) if len(sys.argv) > 1 else 6666 er = EidosReader() er.process_text('hello') # This is done to initialize the system app.run(host='0.0.0.0', port=port)
from .process import reground_stmts, remove_raw_grounding onts = { 'flattened_interventions': ('https://raw.githubusercontent.com/WorldModelers/Ontologies/master/' 'wm_with_flattened_interventions_metadata.yml'), 'main': ('https://raw.githubusercontent.com/WorldModelers/Ontologies/master/' 'wm_metadata.yml'), 'no_regrounding': ('https://raw.githubusercontent.com/WorldModelers/Ontologies/master/' 'wm_metadata.yml'), } if __name__ == '__main__': eidos_reader = EidosReader() for key, ont_url in onts.items(): with open('eidos_raw.pkl', 'rb') as fh: stmts = pickle.load(fh) #stmts = load_eidos() #stmts = ac.filter_by_type(stmts, Influence) #remove_namespaces(stmts, ['WHO', 'MITRE12', 'UN', 'PROPS', # 'INTERVENTIONS']) ont = load_world_ontology(ont_url) if key != 'no_regrounding': stmts = reground_stmts(stmts, ont, 'WM', None, True) scorer = get_eidos_scorer() matches_fun, refinement_fun = None, None
import json from indra.sources.eidos.reader import EidosReader from indra.sources import hume from indra.statements import stmts_to_json_file def load_config(): with open('config.json', 'r') as fh: config = json.load(fh) return config er = EidosReader() def do_regrounding(stmts): concepts = [] for stmt in stmts: for concept in stmt.agent_list(): concept_txt = concept.db_refs.get('TEXT') concepts.append(concept_txt) groundings = er.reground_texts(concepts) # Update the corpus with new groundings idx = 0 for stmt in stmts: for concept in stmt.agent_list(): concept.db_refs['UN'] = groundings[idx] idx += 1 return stmts
read with Eidos. To run the server, do python -m indra.sources.eidos.server and then submit POST requests to the `localhost:5000/process_text` endpoint with JSON content as `{'text': 'text to read'}`. The response will be the Eidos JSON-LD output. """ import json from flask import Flask, request from indra.sources.eidos.reader import EidosReader app = Flask(__name__) @app.route('/process_text', methods=['POST']) def process_text(): text = request.json.get('text') if not text: return {} res = er.process_text(text, 'json_ld') return json.dumps(res) if __name__ == '__main__': er = EidosReader() er.process_text('hello', 'json_ld') app.run(host='0.0.0.0')