def ner_process(self, process_type, sentence): """ Initiated the Respective NER provider object Implements the Factory Design Pattern :param process_type: process_type can be <spacy|nltk|aws|gcloud|allen> :param sentence: Sentence to check for the NER :return: entities """ try: if process_type == 'gcloud': ner = GCloudNLP() elif process_type == 'spacy': ner = SpacyNLP() elif process_type == 'allen': ner = AllenNLP() elif process_type == 'nltk': ner = NLTKNLP() elif process_type == 'stanford': ner = NLTKNLP() elif process_type == 'aws': ner = AWSNLP() if process_type in ('aws'): ner.client_info = self.read_client_info(service_type='ner', client=process_type) return ner.recognise_entities(sentence) except Exception as error: logger.error(error)
def get_service(self, service_name, service_type=None, service_region_name=None, aws_access_key_id=None, aws_secret_access_key=None): """ Returns the AWS service details when provided the service name :param service_name: :return: """ try: # @TODO: To be removed if aws_access_key_id and aws_secret_access_key: credentials = { 'aws_access_key_id': aws_access_key_id, 'aws_secret_access_key': aws_secret_access_key } except KeyError as key_error: logger.error(key_error) if service_type == 'client': return boto3.client(service_name, service_region_name, **credentials) else: return boto3.resource(service_name, service_region_name, **credentials)
def recognise_entities(self, text): try: combined_entities = defaultdict(list) doc = self.nlp(text) # Analyze syntax noun_phrases = [chunk.text for chunk in doc.noun_chunks] verbs = [token.lemma_ for token in doc if token.pos_ == "VERB"] #for entity in doc.ents: [combined_entities[entity.label_].append(entity.text) for entity in doc.ents] return {'combined_entities': dict(combined_entities), 'metadata': {'noun_phrases': noun_phrases, 'verbs': verbs}} except Exception as error: logger.error(error)
def read_client_info(**kwargs): try: with open("../conf/client.json") as fp: client_info = json.load(fp) if kwargs.get('service_type'): client_info = client_info[kwargs.get('service_type')] if kwargs.get('client'): client_info = client_info[kwargs.get('client')] return client_info except Exception as error: logger.error(error)
def __init__(self, **kwargs): if kwargs: self.process_type = kwargs.get('process_type') if kwargs.get( 'process_type') else 'spacy' self.sentence = kwargs.get('sentence') else: logger.error("'sentence' param is mandatory!!!") sys.exit(1) logger.info( f"Recognising Entities using '{self.process_type}' . . . .") self.entities = self.ner_process(self.process_type, self.sentence) print("\nENTITIES: {}".format(self.entities.get('combined_entities')))
def recognise_entities(self, text): try: combined_entities = defaultdict(list) comprehend = super().get_service(service_name="comprehend", service_type="client", service_region_name=self.client_info['region'], aws_access_key_id=self.client_info['aws_access_key_id'], aws_secret_access_key=self.client_info['aws_secret_access_key']) for item in comprehend.detect_entities(Text=text, LanguageCode='en')['Entities']: combined_entities[item['Type']].append(item['Text']) return {'combined_entities': dict(combined_entities), 'metadata': comprehend.detect_entities(Text=text, LanguageCode='en')['Entities']} except Exception as error: logger.error(error)
def sentiment(self, text): try: document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT) # Detects the sentiment of the text sentiment = self.client.analyze_sentiment( document=document).document_sentiment sentiment_result = { 'score': sentiment.score, 'magnitude': sentiment.magnitude } return sentiment_result except Exception as error: logger.error(error)
def main(): try: if len(sys.argv) == 3: sentence = sys.argv[1] process_type = sys.argv[2] elif len(sys.argv) == 2: sentence = sys.argv[1] process_type = "spacy" else: logger.info( "Usage: python heroka_ner.py <sentence> <spacy|aws|nltk|allen|stanford>" ) sys.exit(1) ner = NER(process_type=process_type, sentence=sentence) return ner except Exception as error: logger.error(error)
def recognise_entities(self, listofnames, entities=None): """ Recoginise entities through Allen NLP API :param listofnames: List of sentence tokens :param entities: specifiy what entities to return (ORG, PERSON) :return: """ names_entity = defaultdict(list) for name in listofnames: try: #Api Approcach #url = "https://demo.allennlp.org/predict/named-entity-recognition" url = "https://demo.allennlp.org/api/named-entity-recognition/predict" #defining a params dict for the parameters to be sent to the API payload = {"sentence": name} # sending get request and saving the response as response object try: post_response = requests.post(url=url, json=payload) except: time.sleep(2) post_response = requests.post(url=url, json=payload) if post_response.status_code != 200: logger.error("Unable to hit ALLEN NLP API!!!!") sys.exit() results = post_response.json() if results: names_entity[name] = \ {res for res in zip(results['tags'], results['words']) if not res[0] == 'O'} except Exception as error: logger.warning(error) continue if entities: requested_entities = {} for entity in entities: requested_entities[entity] = names_entity[entity] return dict(requested_entities) else: return dict(names_entity)
def recognise_entities(self, text): """ Recognise entities :param text: Sentence :return: Entities """ entities_res = defaultdict(list) try: words = nltk.word_tokenize(text) pos_tags = nltk.pos_tag(words) chunk = nltk.ne_chunk(pos_tags) for ele in chunk.subtrees(): try: if not ele.label() == 'S': entities_res[ele.label()].append(ele.leaves()[0][0]) except Exception as err: continue except Exception as error: logger.error(error) return {"combined_entities": dict(entities_res), "metadata": chunk}
def recognise_entities(self, text): """ Functionalities to recognise entities :param text: Sentence :return: """ try: combined_entities = defaultdict(list) encoding_type = 'UTF32' document = language.types.Document( content=text, type=language.enums.Document.Type.PLAIN_TEXT) response = self.client.analyze_entities( document=document, encoding_type=encoding_type) for entity in response.entities: combined_entities[enums.Entity.Type(entity.type).name].append( entity.name) self.entities = dict(combined_entities) return { 'combined_entities': dict(combined_entities), 'metadata': response } #{response.entity, response.type,response.metadata, response.salience} except Exception as error: logger.error(error)
# # Usage: # # ====================================================================================================================== from heroka_nlp import logger import sys try: import boto3 except ImportError as ie: logger.error(ie) sys.exit(1) class AWS: def __init__(self, **kwargs): if kwargs: self.service = kwargs.get('service') self.service_type = kwargs.get('service_type', 'resource') def get_service(self, service_name, service_type=None, service_region_name=None,