def parse(self, text): result = {} doc = None try: text = urllib.unquote(text) text = text.decode('utf-8') doc = lf.parseText(text, self.english) except Exception as ee: logging.info( 'error in utf-8 decode - (will attempt latin-1 decode)') logging.info(ee) doc = lf.parseText(text.decode('latin-1'), self.english) if doc is not None: try: functions = [ lf.tagSentences, lf.tagPOSTags, lf.tagNamedEntities, lf.tagNounChunks ] lf.run_functions(doc, result, *functions) if 'named_entities' in result: return json.dumps(result['named_entities']) else: raise NLPException('No named entities found.') except Exception as e: logging.error("Error in NLP: %s" % e) return json.dumps({}) else: logging.info( 'nothing returned from linguistic features parseText.') return json.dumps({})
def parse(self, text): result = {} doc = None try: text = urllib.unquote(text) text = text.decode('utf-8') doc = lf.parseText(text, self.english) except Exception as ee: logging.info('error in utf-8 decode - (will attempt latin-1 decode)') logging.info(ee) doc = lf.parseText(text.decode('latin-1'), self.english) if doc is not None: try: functions = [lf.tagSentences, lf.tagPOSTags, lf.tagNamedEntities, lf.tagNounChunks] lf.run_functions(doc, result, *functions) if 'named_entities' in result: return json.dumps(result['named_entities']) else: raise NLPException('No named entities found.') except Exception as e: logging.error("Error in NLP: %s" % e) return json.dumps({}) else: logging.info('nothing returned from linguistic features parseText.') return json.dumps({})
def run_nlp(input_text='', input_file='', input_directory='', output_directory='', output_json=True): """Extracts named entities (place names, etc...) from text through the use of natural language processing (NLP)l :param input_text: input text to be processed -- optional :param input_file: input file to be processed -- optional :param input_directory: a directory of files to be processed :param output_directory: an output directory where the output results are written to a JSON file -- optional :param output_json: specify to output json result or not """ logging.info("recieved input text: %s " % input_text) # Load the English model. nlp = English() source = None if input_text: source = [('stdin', input_text)] if not source: file_path = input_file with codecs.open(file_path, 'r', 'utf-8') as f: source = [(file_path, f.read())] if not source: if input_directory: source = read_files(input_directory) if not source: source = ['stdin', sys.stdin.read()] if source: for name, text in source: result = {} try: doc = lf.parseText(text, nlp) except (UnicodeError, TypeError): try: doc = lf.parseText(text.decode('utf-8'), nlp) except UnicodeDecodeError: doc = lf.parseText(text.decode('latin-1'), nlp) # The list of functions to run. functions = [lf.tagSentences, lf.tagPOSTags, lf.tagNamedEntities, lf.tagNounChunks] lf.run_functions(doc, result, *functions) # Write results to a file. if 'named_entities' in result: if output_directory: handle_result(result['named_entities'], name, output_directory) else: if output_json is True: return json.dumps(result['named_entities']) else: return result['named_entities'] else: raise NLPException('No named entities found.') else: raise NLPException('No text to process.')
def parse(self, text): text = urllib.unquote(text) result = {} try: doc = lf.parseText(text, self.english) except (UnicodeError, TypeError): try: doc = lf.parseText(text.decode('utf-8'), self.english) except UnicodeDecodeError: doc = lf.parseText(text.decode('latin-1'), self.english) try: functions = [lf.tagSentences, lf.tagPOSTags, lf.tagNamedEntities, lf.tagNounChunks] lf.run_functions(doc, result, *functions) if 'named_entities' in result: return json.dumps(result['named_entities']) else: raise NLPException('No named entities found.') except Exception as e: logging.error("Error in NLP: %s" % e)
def parse(self, text): text = urllib.unquote(text) result = {} try: doc = lf.parseText(text, self.english) except (UnicodeError, TypeError): try: doc = lf.parseText(text.decode('utf-8'), self.english) except UnicodeDecodeError: doc = lf.parseText(text.decode('latin-1'), self.english) try: functions = [ lf.tagSentences, lf.tagPOSTags, lf.tagNamedEntities, lf.tagNounChunks ] lf.run_functions(doc, result, *functions) if 'named_entities' in result: return json.dumps(result['named_entities']) else: raise NLPException('No named entities found.') except Exception as e: logging.error("Error in NLP: %s" % e)
def run_nlp(input_text='', input_file='', input_directory='', output_directory='', output_json=True): """Extracts named entities (place names, etc...) from text through the use of natural language processing (NLP)l :param input_text: input text to be processed -- optional :param input_file: input file to be processed -- optional :param input_directory: a directory of files to be processed :param output_directory: an output directory where the output results are written to a JSON file -- optional :param output_json: specify to output json result or not """ logging.info("recieved input text: %s " % input_text) # Load the English model. nlp = English() source = None if input_text: source = [('stdin', input_text)] if not source: file_path = input_file with codecs.open(file_path, 'r', 'utf-8') as f: source = [(file_path, f.read())] if not source: if input_directory: source = read_files(input_directory) if not source: source = ['stdin', sys.stdin.read()] if source: for name, text in source: result = {} try: doc = lf.parseText(text, nlp) except (UnicodeError, TypeError): try: doc = lf.parseText(text.decode('utf-8'), nlp) except UnicodeDecodeError: doc = lf.parseText(text.decode('latin-1'), nlp) # The list of functions to run. functions = [ lf.tagSentences, lf.tagPOSTags, lf.tagNamedEntities, lf.tagNounChunks ] lf.run_functions(doc, result, *functions) # Write results to a file. if 'named_entities' in result: if output_directory: handle_result(result['named_entities'], name, output_directory) else: if output_json is True: return json.dumps(result['named_entities']) else: return result['named_entities'] else: raise NLPException('No named entities found.') else: raise NLPException('No text to process.')