def main(): parser = StanfordServerParser() while True: try: line = raw_input("Enter line: ") tree = parser.parse(line) tree.pretty_print() except EOFError: print "Bye!" sys.exit(0)
def main(): parser = StanfordServerParser() while True: try: line = input("Enter line: ") tree = parser.parse(line) tree.pretty_print() except EOFError: print("Bye!") sys.exit(0)
class StanfordParserTest(object): def __init__(self, host='localhost', port=9000, properties={}): self.parser = StanfordServerParser(host, port, properties) def query(self, sentence): tree = self.parser.parse(sentence) return tree
class QueryHandler(object): """ Processing natural language query and return answer. """ def __init__(self, data_source='online', host='localhost', port=9000, properties={}): """ Initialize query handler. Args: data_source (str): using 'online' zhishi.me API or 'local' sparql endpoint. host (str): host of Stanford CoreNLP service port (int): port of Stanford CoreNLP service properties (dict): properties for Stanfoord CoreNLP service """ # Initialize Stanford CoreNLP Parser self.parser = StanfordServerParser(host, port, properties) # Define rules. # query single entity self.entity_rules = OrderedDict([ # 命名实体,如 周杰伦,微软 ('( FRAG ( NR:subject-r ) )', {}), # 普通名词,如 水果 ('( NP ( NN:subject-r ) )', {}), # 谁是周杰伦,什么是桃子 ('( IP ( NP ( PN ) ) ( VP ( VC ) ( NP ( NN/NR:subject-r ) ) ) )', {}), # 周杰伦是谁,桃子是什么 ('( IP ( NP ( NN/NR:subject-r ) ) ( VP ( VC ) ( NP ( PN ) ) ) )', {}), ]) # query entity property self.entity_property_rules = OrderedDict([ # 姚明身高 ('( NP ( NP ( NP/NR:subject-o ) ) ( NP ( NN:property-r ) ) )', {}), # 姚明的身高 ('( NP ( DNP ( NP ( NN/NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) )', {}), # 珠穆朗玛峰的海拔是多少 ('( IP ( NP ( DNP ( NP ( NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) ) ( VP ) )', {}), # 珠穆朗玛峰海拔是多少 ('( IP ( NP ( NP/NR:s_type ) ( NN/NP:p_type ) ) ( VP ( VC ) ( QP/NP:q_type ) ) )', { 's_type': OrderedDict([ ('( NR:subject-r )', {}), ('( NP ( NR:subject-r ) )', {}), ]), 'p_type': OrderedDict([ ('( NN:property-r )', {}), ('( NP ( NN:property-r ) )', {}), ]), 'q_type': OrderedDict([ ('( QP ( CD ) )', {}), ('( NP ( PN ) )', {}), ]) }), ]) # Initialize Knowledge Graph query client self.data_source = data_source if self.data_source == 'online': self.sparql = SparqlOnline() else: self.local_sparql = SparqlLocal() # Initialize logger self.logger = logging.getLogger(self.__class__.__module__) self.logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) self.logger.addHandler(ch) # Initialize synonyms handler self.property_synonyms = PropertySynonyms() def _entity_query(self, entity_name): """ Handle entity query. Args: entity_name (str): entity name Returns: text (str): abstract of entity """ if self.data_source == 'local': return self.sparql.get_abstract(entity_name) else: baidu_result = self.sparql.get_abstract(entity_name, baike='baidubaike') if not baidu_result: return self.sparql.get_abstract(entity_name, baike='zhwiki') return baidu_result def _eneity_property_query(self, entity_name, property_name): """ Handle entity property query. Args: entity_name (str) property_name (str) Returns: text (str): entity property value """ if self.data_source == 'local': return self.sparql.get_property(entity_name, property_name) else: corrected_property = self.property_synonyms.get_synonyms( property_name) self.logger.debug('Corrected property:\n%s' % corrected_property) baidu_result = self.sparql.get_property(entity_name, property_name, baike='baidubaike') if not baidu_result: return self.sparql.get_property(entity_name, property_name, baike='zhwiki') return baidu_result def query(self, sentence): """ Answers a query Args: sentence (str): query sentence Returns: ans(str): answer text """ tree = self.parser.parse(sentence) self.logger.debug('Dependence parse tree: \n%s' % tree) info = match_rules(tree, self.entity_rules) if info: # entity query self.logger.debug('Entity match:\n%s' % info) ans = self._entity_query(info['subject']) return ans info = match_rules(tree, self.entity_property_rules) if info: # entity proprety query self.logger.debug('Entity property match:\n%s' % info) ans = self._eneity_property_query(info['subject'], info['property']) return ans return 'rule not match'
def __init__(self, data_source='online', host='localhost', port=9000, properties={}): """ Initialize query handler. Args: data_source (str): using 'online' zhishi.me API or 'local' sparql endpoint. host (str): host of Stanford CoreNLP service port (int): port of Stanford CoreNLP service properties (dict): properties for Stanfoord CoreNLP service """ # Initialize Stanford CoreNLP Parser self.parser = StanfordServerParser(host, port, properties) # Define rules. # query single entity self.entity_rules = OrderedDict([ # 命名实体,如 周杰伦,微软 ('( FRAG ( NR:subject-r ) )', {}), # 普通名词,如 水果 ('( NP ( NN:subject-r ) )', {}), # 谁是周杰伦,什么是桃子 ('( IP ( NP ( PN ) ) ( VP ( VC ) ( NP ( NN/NR:subject-r ) ) ) )', {}), # 周杰伦是谁,桃子是什么 ('( IP ( NP ( NN/NR:subject-r ) ) ( VP ( VC ) ( NP ( PN ) ) ) )', {}), ]) # query entity property self.entity_property_rules = OrderedDict([ # 姚明身高 ('( NP ( NP ( NP/NR:subject-o ) ) ( NP ( NN:property-r ) ) )', {}), # 姚明的身高 ('( NP ( DNP ( NP ( NN/NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) )', {}), # 珠穆朗玛峰的海拔是多少 ('( IP ( NP ( DNP ( NP ( NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) ) ( VP ) )', {}), # 珠穆朗玛峰海拔是多少 ('( IP ( NP ( NP/NR:s_type ) ( NN/NP:p_type ) ) ( VP ( VC ) ( QP/NP:q_type ) ) )', { 's_type': OrderedDict([ ('( NR:subject-r )', {}), ('( NP ( NR:subject-r ) )', {}), ]), 'p_type': OrderedDict([ ('( NN:property-r )', {}), ('( NP ( NN:property-r ) )', {}), ]), 'q_type': OrderedDict([ ('( QP ( CD ) )', {}), ('( NP ( PN ) )', {}), ]) }), ]) # Initialize Knowledge Graph query client self.data_source = data_source if self.data_source == 'online': self.sparql = SparqlOnline() else: self.local_sparql = SparqlLocal() # Initialize logger self.logger = logging.getLogger(self.__class__.__module__) self.logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) self.logger.addHandler(ch) # Initialize synonyms handler self.property_synonyms = PropertySynonyms()
from collections import OrderedDict import os from lango.parser import StanfordServerParser from lango.matcher import match_rules parser = StanfordServerParser() sents = ['What religion is the President of the United States?'] rules = { '( SBARQ ( WHNP/WHADVP:wh_t ) ( SQ ( VBZ ) ( NP:np_t ) ) )': { 'np_t': { '( NP ( NP:subj-o ) ( PP ( IN:subj_in-o ) ( NP:obj-o ) ) )': {}, '( NP:subj-o )': {}, }, 'wh_t': { '( WHNP:whnp ( WDT ) ( NN:prop-o ) )': {}, '( WHNP/WHADVP:qtype-o )': {}, } }, '( SBARQ:subj-o )': {}, } keys = ['subj', 'subj_in', 'obj', 'prop', 'qtype'] for sent in sents: tree = parser.parse(sent) contexts = match_rules(tree, rules, multi=True) for context in contexts: print(", ".join(['%s:%s' % (k, context.get(k)) for k in keys])) """
def __init__(self, host='localhost', port=9000, properties={}): LoggingInterface.__init__(self) self.parser = StanfordServerParser(host, port, properties) """ Rule for matching a subject and/or property of NP Matches: - subject : Subject to get property of - prop (optional): Propert to get of subject Examples: - Obama born - Obama - Obama's birthday - Barack Obama's wife """ self.subj_rules = OrderedDict([ # When was (Obama born) ('( NP ( NP:subject-o ) ( VP:prop-o ) )', {}), # What is (the birth day of Obama) ('( NP ( NP:prop-o ) ( PP ( IN ) ( NP:subject-o ) ) )', {}), # What is (Obama's birthday) ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )', {} ), # What is (Obama's birth day) ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )', {}), # What is (Barrack Obama's birthday) ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )', {}), # What is (Barack Obama's birth day) ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )', {}), # What was (the Soviet Union’s motto) ('( NP ( NP:subject-o ( DT ) ( NNP ) ( NNP ) ( POS ) ) ( NN:prop-o ) )', {}), ('( NP:subject-o )', {}), ]) """ Rule for matching subject property query Matches: - qtype : Question type (who, where, what, when) - subject : Subject to get property of - prop (optional): Property to get of subject - prop2 (optional): Second part of property - prop3 (optional): Overwrite property - jj (optional): Adjective that will be property (e.g. many/tall/high) Examples: - What religion is Obama? - Who did Obama marry? - Who is Obama? - Who is Barack Obama's wife? - How tall is Mt. Everest? """ self.subject_prop_rules = { '( SBARQ ( WHNP/WHADVP/WHADJP:qtype_t ) ( SQ:sq_t ) )': { 'qtype_t': OrderedDict([ # What religion ('( WHNP ( WDT:qtype-o=what ) ( NN:prop3-o ) )', {}), # What genre ('( WHNP ( WDT:qtype-o=what ) ( NP ( NN:prop3-o ) ) )', {} ), # How many/tall ('( WHADJP ( WRB:qtype-o ) ( JJ:jj-o ) )', {}), # which country ('( WHNP ( WDT:qtype-o=which ) ( NN ) )', {}), # What/where/who ('( WHNP/WHADVP:qtype-o )', {}), ]), 'sq_t': { # What ethnicity is Obama '( SQ ( VP ( ADVP:prop-o ) ) ( VBZ ) ( VP:suject-o ) )': {}, # Who did Obama marry '( SQ ( VBD:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': { 'subj_t': self.subj_rules }, # Who did '( SQ ( VP ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) ) )': { 'subj_t': self.subj_rules }, # Who is Edward Thatch known as '( SQ ( VBZ:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': { 'subj_t': self.subj_rules, }, # Which country is Deshauna Barber a citizen of? '( SQ ( VBZ:action-o ) ( NP:subject-o ( NNP ) ( NNP ) ) ( NP ( NP ( DT ) ( NN:prop-o ) ) ( PP ( IN ) ) ) )': {}, # What is Obama '( SQ ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) )': { 'subj_t': self.subj_rules } } } } """ Rule for getting property of NP or VP Matches: prop : Property of instance to match op : Operation to match property value: Value of property Examples: - born in 1950 - have population over 100,000 """ self.prop_rules = OrderedDict([ # ('( SQ/VP ( VB/VBP/VBD ) ( VP ( VBN:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) ) )', {}), # are in Asia ('( SQ/VP ( VB/VBP/VBD=are ) ( PP ( IN:op-o ) ( NP:value-o ) ) )', {}), # died from laryngitis ('( SQ/VP ( VB/VBP/VBD:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) )', {}), # have population over 1000000 ('( SQ/VP ( VB/VBP/VBD ) ( NP ( NP:prop-o ) ( PP ( IN:op-o ) ( NP/CD/JJ:value-o ) ) ) )', {}), ('( SQ/VP ( VB/VBP/VBD ) ( NP:prop-o ) ( NP ( QP ( JJR:op-o ) ( IN ) ( CD:value-o ) ) ) )', {}), ('( SQ/VP ( VB/VBP/VBD ) ( NP ( QP ( JJR:op-o ) ( IN=than ) ( NP/CD/JJ:value-o ) ) ( NNS:value_units-o ) ) )', {}), ('( PP ( IN:op-o ) ( NP ( NP:value-o ) ( PP:pp_t ) ) )', {}), ('( PP ( IN:op-o ) ( NP:value-o ) )', {}), ]) """ Rules for finding entity queries Matches: qtype : question type (how many, which) inst : instance of entity to match prop_match_t (optional): Parse tree for first property match prop_match2_t (optional): Parse tree for second property match Examples: - How many POTUS are there? - Which POTUS are born in 1950? - How many books are written by George Orwell? - How many countries are in Asia and have population over 100,000? """ self.find_entity_rules = OrderedDict([ ('( SBARQ ( WHNP ( WHNP ( WHADJP:qtype-o ) ( NNS:inst-O ) ) ( PP:prop_match_t ) ) )', {}), ('( SBARQ ( WHNP:qtype-o=who ) ( SQ:sq_t ) )', { 'sq_t': { '( SQ ( VBD/VBZ ) ( NP ( NP:inst-O ) ( PP:prop_match_t ) ) )': {} }, }), ( '( SBARQ ( WHNP ( WHADJP/WDT/WHNP:qtype-o ) ( NNS/NN/NP:inst-O ) ) ( SQ:sq_t ) )', { 'sq_t': OrderedDict([ # are there ('( SQ ( VBP ) ( NP ( EX=there ) ) )', {}), # are in Asia and have population over 100,000 ('( SQ ( VP ( VP:prop_match_t ) ( CC ) ( VP:prop_match2_t ) ) )', {}), ('( SQ ( VP:prop_match_t ) )', {}), ('( SQ:prop_match_t )', {}), ]) }), ]) self.wd = WikiData()
class NLQueryEngine(LoggingInterface): """ Grammar mapping for knowledge queries of the form: - What is the X of Y - What is X's Y """ def __init__(self, host='localhost', port=9000, properties={}): LoggingInterface.__init__(self) self.parser = StanfordServerParser(host, port, properties) """ Rule for matching a subject and/or property of NP Matches: - subject : Subject to get property of - prop (optional): Propert to get of subject Examples: - Obama born - Obama - Obama's birthday - Barack Obama's wife """ self.subj_rules = OrderedDict([ # When was (Obama born) ('( NP ( NP:subject-o ) ( VP:prop-o ) )', {}), # What is (the birth day of Obama) ('( NP ( NP:prop-o ) ( PP ( IN ) ( NP:subject-o ) ) )', {}), # What is (Obama's birthday) ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )', {} ), # What is (Obama's birth day) ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )', {}), # What is (Barrack Obama's birthday) ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )', {}), # What is (Barack Obama's birth day) ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )', {}), # What was (the Soviet Union’s motto) ('( NP ( NP:subject-o ( DT ) ( NNP ) ( NNP ) ( POS ) ) ( NN:prop-o ) )', {}), ('( NP:subject-o )', {}), ]) """ Rule for matching subject property query Matches: - qtype : Question type (who, where, what, when) - subject : Subject to get property of - prop (optional): Property to get of subject - prop2 (optional): Second part of property - prop3 (optional): Overwrite property - jj (optional): Adjective that will be property (e.g. many/tall/high) Examples: - What religion is Obama? - Who did Obama marry? - Who is Obama? - Who is Barack Obama's wife? - How tall is Mt. Everest? """ self.subject_prop_rules = { '( SBARQ ( WHNP/WHADVP/WHADJP:qtype_t ) ( SQ:sq_t ) )': { 'qtype_t': OrderedDict([ # What religion ('( WHNP ( WDT:qtype-o=what ) ( NN:prop3-o ) )', {}), # What genre ('( WHNP ( WDT:qtype-o=what ) ( NP ( NN:prop3-o ) ) )', {} ), # How many/tall ('( WHADJP ( WRB:qtype-o ) ( JJ:jj-o ) )', {}), # which country ('( WHNP ( WDT:qtype-o=which ) ( NN ) )', {}), # What/where/who ('( WHNP/WHADVP:qtype-o )', {}), ]), 'sq_t': { # What ethnicity is Obama '( SQ ( VP ( ADVP:prop-o ) ) ( VBZ ) ( VP:suject-o ) )': {}, # Who did Obama marry '( SQ ( VBD:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': { 'subj_t': self.subj_rules }, # Who did '( SQ ( VP ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) ) )': { 'subj_t': self.subj_rules }, # Who is Edward Thatch known as '( SQ ( VBZ:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': { 'subj_t': self.subj_rules, }, # Which country is Deshauna Barber a citizen of? '( SQ ( VBZ:action-o ) ( NP:subject-o ( NNP ) ( NNP ) ) ( NP ( NP ( DT ) ( NN:prop-o ) ) ( PP ( IN ) ) ) )': {}, # What is Obama '( SQ ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) )': { 'subj_t': self.subj_rules } } } } """ Rule for getting property of NP or VP Matches: prop : Property of instance to match op : Operation to match property value: Value of property Examples: - born in 1950 - have population over 100,000 """ self.prop_rules = OrderedDict([ # ('( SQ/VP ( VB/VBP/VBD ) ( VP ( VBN:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) ) )', {}), # are in Asia ('( SQ/VP ( VB/VBP/VBD=are ) ( PP ( IN:op-o ) ( NP:value-o ) ) )', {}), # died from laryngitis ('( SQ/VP ( VB/VBP/VBD:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) )', {}), # have population over 1000000 ('( SQ/VP ( VB/VBP/VBD ) ( NP ( NP:prop-o ) ( PP ( IN:op-o ) ( NP/CD/JJ:value-o ) ) ) )', {}), ('( SQ/VP ( VB/VBP/VBD ) ( NP:prop-o ) ( NP ( QP ( JJR:op-o ) ( IN ) ( CD:value-o ) ) ) )', {}), ('( SQ/VP ( VB/VBP/VBD ) ( NP ( QP ( JJR:op-o ) ( IN=than ) ( NP/CD/JJ:value-o ) ) ( NNS:value_units-o ) ) )', {}), ('( PP ( IN:op-o ) ( NP ( NP:value-o ) ( PP:pp_t ) ) )', {}), ('( PP ( IN:op-o ) ( NP:value-o ) )', {}), ]) """ Rules for finding entity queries Matches: qtype : question type (how many, which) inst : instance of entity to match prop_match_t (optional): Parse tree for first property match prop_match2_t (optional): Parse tree for second property match Examples: - How many POTUS are there? - Which POTUS are born in 1950? - How many books are written by George Orwell? - How many countries are in Asia and have population over 100,000? """ self.find_entity_rules = OrderedDict([ ('( SBARQ ( WHNP ( WHNP ( WHADJP:qtype-o ) ( NNS:inst-O ) ) ( PP:prop_match_t ) ) )', {}), ('( SBARQ ( WHNP:qtype-o=who ) ( SQ:sq_t ) )', { 'sq_t': { '( SQ ( VBD/VBZ ) ( NP ( NP:inst-O ) ( PP:prop_match_t ) ) )': {} }, }), ( '( SBARQ ( WHNP ( WHADJP/WDT/WHNP:qtype-o ) ( NNS/NN/NP:inst-O ) ) ( SQ:sq_t ) )', { 'sq_t': OrderedDict([ # are there ('( SQ ( VBP ) ( NP ( EX=there ) ) )', {}), # are in Asia and have population over 100,000 ('( SQ ( VP ( VP:prop_match_t ) ( CC ) ( VP:prop_match2_t ) ) )', {}), ('( SQ ( VP:prop_match_t ) )', {}), ('( SQ:prop_match_t )', {}), ]) }), ]) self.wd = WikiData() def subject_query(self, qtype, subject, action, jj=None, prop=None, prop2=None, prop3=None): """Transforms matched context into query parameters and performs query Args: qtype: Matched type of query (what, who, where, etc.) subject: Matched subject (Obama) action: Matched verb action (is, was, ran) jj (optional): Matched adverb prop (optional): Matched prop prop2 (optional): Matched prop prop3 (optional): Matched prop Returns: Answer: Answer from query, or empty Answer if None """ print() print(qtype) print(subject) print(action) print(jj) print(prop) print(prop2) print(prop3) print() if jj == 'old': # How old is Obama? prop = 'age' if jj in ['tall', 'high']: # How tall is Yao Ming / Eifel tower? prop = 'height' if prop2: prop = prop + ' ' + prop2 if prop3 and not prop: prop = prop3 if not prop: if action not in ['is', 'was']: prop = action if qtype == 'who' and prop.endswith('ed'): prop += ' by' if qtype == 'where' and prop == 'founded': prop = 'location of formation' if qtype == 'where' and action == 'is': prop = 'located' if qtype == 'how' and prop == 'died': prop = 'manner of death' if qtype == 'where' and prop == 'died': prop = 'place of death' if prop in ['birth day', 'birthday']: prop = 'date of birth' ans = self.get_property(qtype, subject, prop) if not ans: ans = Answer() ans.params = { 'qtype': qtype, 'subject': subject, 'prop': prop, } if (prop == "population"): ans.data = ans.data[-1] print(ans.params) return ans def get_prop_tuple(self, prop=None, value=None, op=None, value_units=None, pp_t=None): """Returns a property tuple (prop, value, op). E.g. (population, 1000000, >) Args: prop (str): Property to search for (e.g. population) value (str): Value property should equal (e.g. 10000000) op (str): Operator for value of property (e.g. >) Returns: tuple: Property tuple, e.g: (population, 10000000, >) """ self.info('Prop tuple: {0},{1},{2},{3},{4}', prop, value, op, value_units, pp_t) if op in ['in', 'by', 'of', 'from']: oper = op elif op in ['over', 'above', 'more', 'greater']: oper = '>' elif op in ['under', 'below', 'less']: oper = '<' else: self.error('NO OP {0}', op) return None # Infer property to match value if prop is None: if value_units is not None: if value_units in ['people']: prop = 'population' if not prop: return None props = [(prop, value, oper)] if pp_t: prop_tuple = match_rules(pp_t, self.prop_rules, self.get_prop_tuple) if not prop_tuple: return None props += prop_tuple return props def find_entity_query(self, qtype, inst, prop_match_t=None, prop_match2_t=None): """Transforms matched context into query parameters and performs query for queries to find entities Args: qtype (str): Matched type of query (what, who, where, etc.) inst (str): Matched instance of entity to match (Obama) action (str): Matched verb action (is, was, ran) prop_match_t (Tree): Matched property Tree prop_match2_t (Tree): Matched property Tree Returns: Answer: Answer from query, or empty Answer if None """ props = [] if prop_match_t: prop = match_rules(prop_match_t, self.prop_rules, self.get_prop_tuple) if not prop: return props += prop if prop_match2_t: prop = match_rules(prop_match2_t, self.prop_rules, self.get_prop_tuple) if not prop: return props += prop if not inst.isupper(): inst = singularize(inst) ans = self.wd.find_entity(qtype, inst, props) if not ans: ans = Answer() ans.params = { 'qtype': qtype, 'inst': inst, 'props': props, } return ans def get_property(self, qtype, subject, prop): """Gets property of a subject Example: get_property('who', 'Obama', 'wife') = 'Michelle Obama' Args: subject: Subject to get property of prop: Property to get of subject Todo: * Add other APIs here Returns: Answer: Answer from query """ return self.wd.get_property(qtype, subject, prop) def preprocess(self, sent): """Preprocesses a query by adding punctuation""" if sent[-1] != '?': sent = sent + '?' return sent def query(self, sent, format_='plain'): """Answers a query If format is plain, will return the answer as a string If format is raw, will return the raw context of query Args: sent: Query sentence format_: Format of answer to return (Default to plain) Returns: dict: Answer context str: Answer as a string Raises: ValueError: If format_ is incorrect """ sent = self.preprocess(sent) tree = self.parser.parse(sent) context = {'query': sent, 'tree': tree} self.info(tree) ans = first([ match_rules(tree, self.find_entity_rules, self.find_entity_query), match_rules(tree, self.subject_prop_rules, self.subject_query), ]) if not ans: ans = Answer() ans.query = sent ans.tree = str(tree) if format_ == 'raw': return ans.to_dict() elif format_ == 'plain': return ans.to_plain() else: raise ValueError('Undefined format: %s' % format_)
def __init__(self, host='localhost', port=9000, properties={}): self.parser = StanfordServerParser(host, port, properties)
class QueryParser: """Parse an English-language question. ``QueryParser`` uses part-of-speech tagging and pattern matching to turn a natural-language question into a structured query. Parameters ---------- port : int, optional The port on which the Stanford CoreNLP Server listens. [default: 9501] """ def __init__(self, port=9501): self.parser = StanfordServerParser(port=port) self.rules = self.get_rules() def get_rules(self): rules_outcome = { '( SBARQ ( WHNP ( WDT ) ( NP:np ) ) ( SQ ( VP ( VBZ:action-o ) ( PP:pp1 ) ( PP:pp2 ) ) ) )': { 'np': { '( NP:relation-o )': {} }, 'pp1': { '( PP ( TO=to ) ( NP:to_object-o ) )': {}, '( PP ( IN=from ) ( NP:from_object-o ) )': {} }, 'pp2': { '( PP ( TO=to ) ( NP:to_object-o ) )': {}, '( PP ( IN=from ) ( NP:from_object-o ) )': {} } }, '( SBARQ ( WHNP ( WDT ) ( NP:np1 ) ) ( SQ ( VP ( VBZ:action-o ) ( NP:np2 ) ( PP:pp ) ) ) )': { 'np1': { '( NP:relation-o )': {} }, 'np2': { '( NP:from_object-o )': {} }, 'pp': { '( PP ( TO=to ) ( NP:to_object-o ) )': {} } }, '( SBAR ( WHNP ( WDT ) ) ( S ( NP:np1 ) ( VP ( VBZ:action-o ) ( NP:np2 ) ( PP:pp ) ) ) )': { 'np1': { '( NP:relation-o )': {} }, 'np2': { '( NP:from_object-o )': {} }, 'pp': { '( PP ( TO=to ) ( NP ( NN:to_object-o ) ( NN ) ) )': {} } }, '( SBARQ ( WHNP ( WP ) ) ( SQ ( VBZ:action-o ) ( NP ( NP:np ) ( PP:pp ) ) ) )': { 'np': { '( NP:relation-o )': {} }, 'pp': { '( PP ( IN=between ) ( NP ( NN:from_object-o ) ( CC=and ) ( NN:to_object-o ) ) )': {} } }, '( S ( VP ( VB:action-o ) ( NP ( NP:np ) ( PP:pp ) ) ) )': { 'np': { '( NP:relation-o )': {} }, 'pp': { '( PP ( IN=between ) ( NP ( NN:from_object-o ) ( CC=and ) ( NN:to_object-o ) ) )': {} } }, '( NP ( NP ( NN:action-o ) ) ( NP ( NP:np ) ( PP:pp ) ) )': { 'np': { '( NP:relation-o )': {} }, 'pp': { '( PP ( IN=between ) ( NP ( NN:from_object-o ) ( CC=and ) ( NN:to_object-o ) ) )': {} } }, '( SBARQ ( WHNP ( WP ) ) ( SQ ( VBZ:action-o ) ( NP ( NP:np ) ( PP:pp ) ) ) )': { 'np': { '( NP:relation-o )': {} }, 'pp': { '( PP ( IN=between ) ( NP:compound_object-o ) )': {} } } } rules_protects = { '( SBARQ ( WHNP ( WDT ) ( NP:np ) ) ( SQ ( VP ( VBP:relation-o ) ( PP:pp ) ) ) )': { 'np': { '( NP:from_object-o )': {} }, 'pp': { '( PP ( IN=from ) ( NP:to_object-o ) )': {} } }, '( SBARQ ( WHNP ( WDT ) ( NP:np ) ) ( SQ ( VP ( VBZ:relation-o ) ( PP:pp ) ) ) )': { 'np': { '( NP:from_object-o )': {} }, 'pp': { '( PP ( IN=from ) ( NP:to_object-o ) )': {} } }, '( FRAG ( SBAR ( WHNP ( WDT ) ) ( S ( NP:np ) ( VP ( VBP:relation-o ) ( PP:pp ) ) ) ) )': { 'np': { '( NP:from_object-o )': {} }, 'pp': { '( PP ( IN=from ) ( NP:to_object-o ) )': {} } } } return({**rules_outcome, **rules_protects}) def process_matches(self, relation, from_object=None, to_object=None, compound_object=None): if compound_object is not None: (to_object, from_object) = compound_object.split(' and ') return({'from':{'term':from_object}, 'to':{'term':to_object}, 'relation':{'term':relation}}) def parse(self, question): """Parse a natural-language question. Parameters ---------- question : str The input question. Returns ------- terms : dict A dictionary of parsed terms. """ tree = self.parser.parse(question) terms = match_rules(tree, self.rules, self.process_matches) if terms is None: return {} mesh = MeshTools() terms['from'].update({k:v for k,v in mesh.get_best_term_entity(terms['from']['term']).items() if k in ('entity', 'bound')}) terms['to'].update({k:v for k,v in mesh.get_best_term_entity(terms['to']['term']).items() if k in ('entity', 'bound')}) # if terms['from']['entity'] is None: # tm = NCITTermMapper() # terms['from']['entity'] = tm.get_entity(terms['from']['term']) # terms['from']['bound'] = True # if terms['to']['entity'] is None: # tm = NCITTermMapper() # terms['to']['entity'] = tm.get_entity(terms['to']['term']) # terms['to']['bound'] = True # if terms['relation']['term'] == 'clinical outcome pathway' and terms['to']['entity'] in ('GeneticCondition', 'Symptom'): # terms['to']['entity'] = 'Disease' ## cheat for proof-of-concept ## if automatic entity parsing fails, get the entities from the original definitions in the question files if terms['from']['entity'] is None: db = 'data/reasoner_data.sqlite' conn = sqlite3.connect(db) c = conn.cursor() query = 'SELECT entity FROM ncats_entity_map WHERE term = "%s"' % terms['from']['term'] result = next(iter(c.execute(query).fetchall()), []) if len(result) > 0: terms['from']['entity'] = result[0] terms['from']['bound'] = True if terms['to']['entity'] is None: db = 'data/reasoner_data.sqlite' conn = sqlite3.connect(db) c = conn.cursor() query = 'SELECT entity FROM ncats_entity_map WHERE term = "%s"' % terms['to']['term'] result = next(iter(c.execute(query).fetchall()), []) if len(result) > 0: terms['to']['entity'] = result[0] terms['to']['bound'] = True terms['from']['bound'] = bool(terms['from']['bound']) terms['to']['bound'] = bool(terms['to']['bound']) return terms
def __init__(self, port=9501): self.parser = StanfordServerParser(port=port) self.rules = self.get_rules()
from collections import OrderedDict import os from lango.parser import StanfordServerParser from lango.matcher import match_rules parser = StanfordServerParser() sents = [ 'Call me an Uber.', 'Get my mother some flowers.', 'Find me a pizza with extra cheese.', 'Give Sam\'s dog a biscuit from Petshop.' ] """ me.call({'item': u'uber'}) my.mother.get({'item': u'flowers'}) me.order({'item': u'pizza', u'with': u'extra cheese'}) sam.dog.give({'item': u'biscuit', u'from': u'petshop'}) """ subj_obj_rules = { 'subj_t': OrderedDict([ # my brother / my mother ('( NP ( PRP$:subject-o=my ) ( NN:relation-o ) )', {}), # Sam's dog ('( NP ( NP ( NNP:subject-o ) ( POS ) ) ( NN:relation-o ) )', {}), # me