def _read(self, content_iter, verbose=False, log=False, n_per_proc=None): # Process all the content. th = None trips_started = False for content in content_iter: if not trips_started: p, service_host = _start_trips() # Set up the trips monitor th = threading.Thread(target=self._monitor_trips_service, args=[p]) th.start() trips_started = True if not self.running: logger.error("Breaking loop: trips is down.") break # Clean up the text string a bit. # - remove all excess white space. # - remove special greek letters # - remove all special unicode, replace with ascii raw_text = content.get_text() raw_text = re.sub('\s+', ' ', raw_text) for greek_letter, spelled_letter in greek_alphabet.items(): raw_text = raw_text.replace(greek_letter, spelled_letter) text = unidecode(raw_text) # Process the text html = client.send_query(text, service_host=service_host, service_endpoint=service_endpoint) if html: xml = client.get_xml(html) self.add_result(content.get_id(), xml) else: self.add_result(content.get_id(), None) # Stop TRIPS if it hasn't stopped already. logger.info("Killing TRIPS") _kill_trips() # Kill all instances of trips-drum. logger.info("Signalling observation loop to stop.") self.stopping = True # Sends signal to the loop to stop if th is not None: logger.info("Waiting for observation loop thread to join.") th.join(timeout=5) if th.is_alive(): logger.warning("Thread did not end, TRIPS is still running.") return self.results
def get_example_extractions(fname): "Get extractions from one of the examples in `cag_examples`." with open(fname, 'r') as f: sentences = f.read().splitlines() rdf_xml_dict = {} for sentence in sentences: logger.info("Reading \"%s\"..." % sentence) html = tc.send_query(sentence, 'cwms') try: rdf_xml_dict[sentence] = tc.get_xml(html, 'rdf:RDF', fail_if_empty=True) except AssertionError as e: logger.error("Got error for %s." % sentence) logger.exception(e) return rdf_xml_dict
def process_text(text, save_xml_name='trips_output.xml', save_xml_pretty=True, offline=False, service_endpoint='drum', service_host=None): """Return a TripsProcessor by processing text. Parameters ---------- text : str The text to be processed. save_xml_name : Optional[str] The name of the file to save the returned TRIPS extraction knowledge base XML. Default: trips_output.xml save_xml_pretty : Optional[bool] If True, the saved XML is pretty-printed. Some third-party tools require non-pretty-printed XMLs which can be obtained by setting this to False. Default: True offline : Optional[bool] If True, offline reading is used with a local instance of DRUM, if available. Default: False service_endpoint : Optional[str] Selects the TRIPS/DRUM web service endpoint to use. Is a choice between "drum" (default) and "drum-dev", a nightly build. service_host : Optional[str] Address of a service host different from the public IHMC server (e.g., a locally running service). Returns ------- tp : TripsProcessor A TripsProcessor containing the extracted INDRA Statements in tp.statements. """ if not offline: html = client.send_query(text, service_endpoint=service_endpoint, service_host=service_host) xml = client.get_xml(html) else: if offline_reading: try: dr = DrumReader() if dr is None: raise Exception('DrumReader could not be instantiated.') except BaseException as e: logger.error(e) logger.error('Make sure drum/bin/trips-drum is running in' ' a separate process') return None try: dr.read_text(text) dr.start() except SystemExit: pass xml = dr.extractions[0] else: logger.error('Offline reading with TRIPS/DRUM not available.') logger.error('Error message was: %s' % offline_err) msg = """ To install DRUM locally, follow instructions at https://github.com/wdebeaum/drum. Next, install the pykqml package either from pip or from https://github.com/bgyori/pykqml. Once installed, run drum/bin/trips-drum in a separate process. """ logger.error(msg) return None if save_xml_name: client.save_xml(xml, save_xml_name, save_xml_pretty) return process_xml(xml)
def process_text(text, save_xml_name='trips_output.xml', save_xml_pretty=True, offline=False, service_endpoint='drum'): """Return a TripsProcessor by processing text. Parameters ---------- text : str The text to be processed. save_xml_name : Optional[str] The name of the file to save the returned TRIPS extraction knowledge base XML. Default: trips_output.xml save_xml_pretty : Optional[bool] If True, the saved XML is pretty-printed. Some third-party tools require non-pretty-printed XMLs which can be obtained by setting this to False. Default: True offline : Optional[bool] If True, offline reading is used with a local instance of DRUM, if available. Default: False service_endpoint : Optional[str] Selects the TRIPS/DRUM web service endpoint to use. Is a choice between "drum" (default) and "drum-dev", a nightly build. Returns ------- tp : TripsProcessor A TripsProcessor containing the extracted INDRA Statements in tp.statements. """ if not offline: html = client.send_query(text, service_endpoint) xml = client.get_xml(html) else: if offline_reading: try: dr = DrumReader() if dr is None: raise Exception('DrumReader could not be instantiated.') except BaseException as e: logger.error(e) logger.error('Make sure drum/bin/trips-drum is running in' ' a separate process') return None try: dr.read_text(text) dr.start() except SystemExit: pass xml = dr.extractions[0] else: logger.error('Offline reading with TRIPS/DRUM not available.') logger.error('Error message was: %s' % offline_err) msg = """ To install DRUM locally, follow instructions at https://github.com/wdebeaum/drum. Next, install the pykqml package either from pip or from https://github.com/bgyori/pykqml. Once installed, run drum/bin/trips-drum in a separate process. """ logger.error(msg) return None if save_xml_name: client.save_xml(xml, save_xml_name, save_xml_pretty) return process_xml(xml)