Пример #1
0
def main():
    parser = StanfordServerParser()
    while True:
        try:
            line = raw_input("Enter line: ")
            tree = parser.parse(line)
            tree.pretty_print()
        except EOFError:
            print "Bye!"
            sys.exit(0)
Пример #2
0
def main():
    parser = StanfordServerParser()
    while True:
        try:
            line = input("Enter line: ")
            tree = parser.parse(line)
            tree.pretty_print()
        except EOFError:
            print("Bye!")
            sys.exit(0)
Пример #3
0
class StanfordParserTest(object):
    def __init__(self, host='localhost', port=9000, properties={}):
        self.parser = StanfordServerParser(host, port, properties)

    def query(self, sentence):
        tree = self.parser.parse(sentence)
        return tree
Пример #4
0
class QueryHandler(object):
    """
    Processing natural language query and return answer.
    """
    def __init__(self,
                 data_source='online',
                 host='localhost',
                 port=9000,
                 properties={}):
        """
        Initialize query handler.

        Args:
            data_source (str): using 'online' zhishi.me API or 'local' sparql endpoint.
            host (str): host of Stanford CoreNLP service
            port (int): port of Stanford CoreNLP service
            properties (dict): properties for Stanfoord CoreNLP service

        """

        # Initialize Stanford CoreNLP Parser
        self.parser = StanfordServerParser(host, port, properties)

        # Define rules.
        # query single entity
        self.entity_rules = OrderedDict([
            # 命名实体,如 周杰伦,微软
            ('( FRAG ( NR:subject-r ) )', {}),
            # 普通名词,如 水果
            ('( NP ( NN:subject-r ) )', {}),
            # 谁是周杰伦,什么是桃子
            ('( IP ( NP ( PN ) ) ( VP ( VC ) ( NP ( NN/NR:subject-r ) ) ) )',
             {}),
            # 周杰伦是谁,桃子是什么
            ('( IP ( NP ( NN/NR:subject-r ) ) ( VP ( VC ) ( NP ( PN ) ) ) )',
             {}),
        ])

        # query entity property
        self.entity_property_rules = OrderedDict([
            # 姚明身高
            ('( NP ( NP ( NP/NR:subject-o ) ) ( NP ( NN:property-r ) ) )', {}),
            # 姚明的身高
            ('( NP ( DNP ( NP ( NN/NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) )',
             {}),
            # 珠穆朗玛峰的海拔是多少
            ('( IP ( NP ( DNP ( NP ( NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) ) ( VP ) )',
             {}),
            # 珠穆朗玛峰海拔是多少
            ('( IP ( NP ( NP/NR:s_type ) ( NN/NP:p_type ) ) ( VP ( VC ) ( QP/NP:q_type ) ) )',
             {
                 's_type':
                 OrderedDict([
                     ('( NR:subject-r )', {}),
                     ('( NP ( NR:subject-r ) )', {}),
                 ]),
                 'p_type':
                 OrderedDict([
                     ('( NN:property-r )', {}),
                     ('( NP ( NN:property-r ) )', {}),
                 ]),
                 'q_type':
                 OrderedDict([
                     ('( QP ( CD ) )', {}),
                     ('( NP ( PN ) )', {}),
                 ])
             }),
        ])

        # Initialize Knowledge Graph query client
        self.data_source = data_source
        if self.data_source == 'online':
            self.sparql = SparqlOnline()
        else:
            self.local_sparql = SparqlLocal()

        # Initialize logger
        self.logger = logging.getLogger(self.__class__.__module__)
        self.logger.setLevel(logging.DEBUG)
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        self.logger.addHandler(ch)

        # Initialize synonyms handler
        self.property_synonyms = PropertySynonyms()

    def _entity_query(self, entity_name):
        """
        Handle entity query.

        Args:
            entity_name (str): entity name

        Returns:
            text (str): abstract of entity
        """
        if self.data_source == 'local':
            return self.sparql.get_abstract(entity_name)
        else:
            baidu_result = self.sparql.get_abstract(entity_name,
                                                    baike='baidubaike')
            if not baidu_result:
                return self.sparql.get_abstract(entity_name, baike='zhwiki')
            return baidu_result

    def _eneity_property_query(self, entity_name, property_name):
        """
        Handle entity property query.

        Args:
            entity_name (str)
            property_name (str)

        Returns:
            text (str): entity property value
        """
        if self.data_source == 'local':
            return self.sparql.get_property(entity_name, property_name)
        else:
            corrected_property = self.property_synonyms.get_synonyms(
                property_name)
            self.logger.debug('Corrected property:\n%s' % corrected_property)
            baidu_result = self.sparql.get_property(entity_name,
                                                    property_name,
                                                    baike='baidubaike')
            if not baidu_result:
                return self.sparql.get_property(entity_name,
                                                property_name,
                                                baike='zhwiki')
            return baidu_result

    def query(self, sentence):
        """
        Answers a query

        Args:
            sentence (str): query sentence

        Returns:
            ans(str): answer text
        """
        tree = self.parser.parse(sentence)
        self.logger.debug('Dependence parse tree: \n%s' % tree)
        info = match_rules(tree, self.entity_rules)
        if info:
            # entity query
            self.logger.debug('Entity match:\n%s' % info)
            ans = self._entity_query(info['subject'])
            return ans
        info = match_rules(tree, self.entity_property_rules)
        if info:
            # entity proprety query
            self.logger.debug('Entity property match:\n%s' % info)
            ans = self._eneity_property_query(info['subject'],
                                              info['property'])
            return ans
        return 'rule not match'
Пример #5
0
    def __init__(self,
                 data_source='online',
                 host='localhost',
                 port=9000,
                 properties={}):
        """
        Initialize query handler.

        Args:
            data_source (str): using 'online' zhishi.me API or 'local' sparql endpoint.
            host (str): host of Stanford CoreNLP service
            port (int): port of Stanford CoreNLP service
            properties (dict): properties for Stanfoord CoreNLP service

        """

        # Initialize Stanford CoreNLP Parser
        self.parser = StanfordServerParser(host, port, properties)

        # Define rules.
        # query single entity
        self.entity_rules = OrderedDict([
            # 命名实体,如 周杰伦,微软
            ('( FRAG ( NR:subject-r ) )', {}),
            # 普通名词,如 水果
            ('( NP ( NN:subject-r ) )', {}),
            # 谁是周杰伦,什么是桃子
            ('( IP ( NP ( PN ) ) ( VP ( VC ) ( NP ( NN/NR:subject-r ) ) ) )',
             {}),
            # 周杰伦是谁,桃子是什么
            ('( IP ( NP ( NN/NR:subject-r ) ) ( VP ( VC ) ( NP ( PN ) ) ) )',
             {}),
        ])

        # query entity property
        self.entity_property_rules = OrderedDict([
            # 姚明身高
            ('( NP ( NP ( NP/NR:subject-o ) ) ( NP ( NN:property-r ) ) )', {}),
            # 姚明的身高
            ('( NP ( DNP ( NP ( NN/NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) )',
             {}),
            # 珠穆朗玛峰的海拔是多少
            ('( IP ( NP ( DNP ( NP ( NR:subject-r ) ) ( DEG ) ) ( NP ( NN:property-r ) ) ) ( VP ) )',
             {}),
            # 珠穆朗玛峰海拔是多少
            ('( IP ( NP ( NP/NR:s_type ) ( NN/NP:p_type ) ) ( VP ( VC ) ( QP/NP:q_type ) ) )',
             {
                 's_type':
                 OrderedDict([
                     ('( NR:subject-r )', {}),
                     ('( NP ( NR:subject-r ) )', {}),
                 ]),
                 'p_type':
                 OrderedDict([
                     ('( NN:property-r )', {}),
                     ('( NP ( NN:property-r ) )', {}),
                 ]),
                 'q_type':
                 OrderedDict([
                     ('( QP ( CD ) )', {}),
                     ('( NP ( PN ) )', {}),
                 ])
             }),
        ])

        # Initialize Knowledge Graph query client
        self.data_source = data_source
        if self.data_source == 'online':
            self.sparql = SparqlOnline()
        else:
            self.local_sparql = SparqlLocal()

        # Initialize logger
        self.logger = logging.getLogger(self.__class__.__module__)
        self.logger.setLevel(logging.DEBUG)
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        self.logger.addHandler(ch)

        # Initialize synonyms handler
        self.property_synonyms = PropertySynonyms()
Пример #6
0
from collections import OrderedDict
import os
from lango.parser import StanfordServerParser
from lango.matcher import match_rules

parser = StanfordServerParser()

sents = ['What religion is the President of the United States?']

rules = {
    '( SBARQ ( WHNP/WHADVP:wh_t ) ( SQ ( VBZ ) ( NP:np_t ) ) )': {
        'np_t': {
            '( NP ( NP:subj-o ) ( PP ( IN:subj_in-o ) ( NP:obj-o ) ) )': {},
            '( NP:subj-o )': {},
        },
        'wh_t': {
            '( WHNP:whnp ( WDT ) ( NN:prop-o ) )': {},
            '( WHNP/WHADVP:qtype-o )': {},
        }
    },
    '( SBARQ:subj-o )': {},
}

keys = ['subj', 'subj_in', 'obj', 'prop', 'qtype']

for sent in sents:
    tree = parser.parse(sent)
    contexts = match_rules(tree, rules, multi=True)
    for context in contexts:
        print(", ".join(['%s:%s' % (k, context.get(k)) for k in keys]))
"""
Пример #7
0
    def __init__(self, host='localhost', port=9000, properties={}):
        LoggingInterface.__init__(self)
        self.parser = StanfordServerParser(host, port, properties)
        """
        Rule for matching a subject and/or property of NP
        Matches:
        - subject            : Subject to get property of
        - prop     (optional): Propert to get of subject

        Examples:
        - Obama born
        - Obama
        - Obama's birthday
        - Barack Obama's wife
        """
        self.subj_rules = OrderedDict([
            # When was (Obama born)
            ('( NP ( NP:subject-o ) ( VP:prop-o ) )', {}),
            # What is (the birth day of Obama)
            ('( NP ( NP:prop-o ) ( PP ( IN ) ( NP:subject-o ) ) )', {}),
            # What is (Obama's birthday)
            ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )', {}
             ),
            # What is (Obama's birth day)
            ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )',
             {}),
            # What is (Barrack Obama's birthday)
            ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )',
             {}),
            # What is (Barack Obama's birth day)
            ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )',
             {}),
            # What was (the Soviet Union’s motto)
            ('( NP ( NP:subject-o ( DT ) ( NNP ) ( NNP ) ( POS ) ) ( NN:prop-o ) )',
             {}),
            ('( NP:subject-o )', {}),
        ])
        """
        Rule for matching subject property query
        Matches:
        - qtype               : Question type (who, where, what, when)
        - subject             :  Subject to get property of
        - prop      (optional): Property to get of subject
        - prop2     (optional): Second part of property
        - prop3     (optional): Overwrite property
        - jj        (optional): Adjective that will be property (e.g. many/tall/high)

        Examples:
        - What religion is Obama?
        - Who did Obama marry?
        - Who is Obama?
        - Who is Barack Obama's wife?
        - How tall is Mt. Everest?
        """
        self.subject_prop_rules = {
            '( SBARQ ( WHNP/WHADVP/WHADJP:qtype_t ) ( SQ:sq_t ) )': {
                'qtype_t':
                OrderedDict([
                    # What religion
                    ('( WHNP ( WDT:qtype-o=what ) ( NN:prop3-o ) )', {}),
                    # What genre
                    ('( WHNP ( WDT:qtype-o=what ) ( NP ( NN:prop3-o ) ) )', {}
                     ),
                    # How many/tall
                    ('( WHADJP ( WRB:qtype-o ) ( JJ:jj-o ) )', {}),
                    # which country
                    ('( WHNP ( WDT:qtype-o=which ) ( NN ) )', {}),
                    # What/where/who
                    ('( WHNP/WHADVP:qtype-o )', {}),
                ]),
                'sq_t': {
                    # What ethnicity is Obama
                    '( SQ ( VP ( ADVP:prop-o ) ) ( VBZ ) ( VP:suject-o ) )':
                    {},
                    # Who did Obama marry
                    '( SQ ( VBD:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': {
                        'subj_t': self.subj_rules
                    },
                    # Who did
                    '( SQ ( VP ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) ) )': {
                        'subj_t': self.subj_rules
                    },
                    # Who is Edward Thatch known as
                    '( SQ ( VBZ:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': {
                        'subj_t': self.subj_rules,
                    },
                    # Which country is Deshauna Barber a citizen of?
                    '( SQ ( VBZ:action-o ) ( NP:subject-o ( NNP ) ( NNP ) ) ( NP ( NP ( DT ) ( NN:prop-o ) ) ( PP ( IN ) ) ) )':
                    {},
                    # What is Obama
                    '( SQ ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) )': {
                        'subj_t': self.subj_rules
                    }
                }
            }
        }
        """
        Rule for getting property of NP or VP
        Matches:
        prop : Property of instance to match
        op   : Operation to match property
        value: Value of property 

        Examples:
        - born in 1950
        - have population over 100,000
        """
        self.prop_rules = OrderedDict([
            #
            ('( SQ/VP ( VB/VBP/VBD ) ( VP ( VBN:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) ) )',
             {}),
            # are in Asia
            ('( SQ/VP ( VB/VBP/VBD=are ) ( PP ( IN:op-o ) ( NP:value-o ) ) )',
             {}),
            # died from laryngitis
            ('( SQ/VP ( VB/VBP/VBD:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) )',
             {}),
            # have population over 1000000
            ('( SQ/VP ( VB/VBP/VBD ) ( NP ( NP:prop-o ) ( PP ( IN:op-o ) ( NP/CD/JJ:value-o ) ) ) )',
             {}),
            ('( SQ/VP ( VB/VBP/VBD ) ( NP:prop-o ) ( NP ( QP ( JJR:op-o ) ( IN ) ( CD:value-o ) ) ) )',
             {}),
            ('( SQ/VP ( VB/VBP/VBD ) ( NP ( QP ( JJR:op-o ) ( IN=than ) ( NP/CD/JJ:value-o ) ) ( NNS:value_units-o ) ) )',
             {}),
            ('( PP ( IN:op-o ) ( NP ( NP:value-o ) ( PP:pp_t ) ) )', {}),
            ('( PP ( IN:op-o ) ( NP:value-o ) )', {}),
        ])
        """
        Rules for finding entity queries
        Matches:
        qtype                   : question type (how many, which)
        inst                    : instance of entity to match
        prop_match_t  (optional): Parse tree for first property match
        prop_match2_t (optional): Parse tree for second property match

        Examples:
        - How many POTUS are there?
        - Which POTUS are born in 1950?
        - How many books are written by George Orwell?
        - How many countries are in Asia and have population over 100,000?
        """
        self.find_entity_rules = OrderedDict([
            ('( SBARQ ( WHNP ( WHNP ( WHADJP:qtype-o ) ( NNS:inst-O ) ) ( PP:prop_match_t ) ) )',
             {}),
            ('( SBARQ ( WHNP:qtype-o=who ) ( SQ:sq_t ) )', {
                'sq_t': {
                    '( SQ ( VBD/VBZ ) ( NP ( NP:inst-O ) ( PP:prop_match_t ) ) )':
                    {}
                },
            }),
            (
                '( SBARQ ( WHNP ( WHADJP/WDT/WHNP:qtype-o ) ( NNS/NN/NP:inst-O ) ) ( SQ:sq_t ) )',
                {
                    'sq_t':
                    OrderedDict([
                        # are there
                        ('( SQ ( VBP ) ( NP ( EX=there ) ) )', {}),
                        # are in Asia and have population over 100,000
                        ('( SQ ( VP ( VP:prop_match_t ) ( CC ) ( VP:prop_match2_t ) ) )',
                         {}),
                        ('( SQ ( VP:prop_match_t ) )', {}),
                        ('( SQ:prop_match_t )', {}),
                    ])
                }),
        ])

        self.wd = WikiData()
Пример #8
0
class NLQueryEngine(LoggingInterface):
    """
    Grammar mapping for knowledge queries of the form:
    - What is the X of Y
    - What is X's Y
    """
    def __init__(self, host='localhost', port=9000, properties={}):
        LoggingInterface.__init__(self)
        self.parser = StanfordServerParser(host, port, properties)
        """
        Rule for matching a subject and/or property of NP
        Matches:
        - subject            : Subject to get property of
        - prop     (optional): Propert to get of subject

        Examples:
        - Obama born
        - Obama
        - Obama's birthday
        - Barack Obama's wife
        """
        self.subj_rules = OrderedDict([
            # When was (Obama born)
            ('( NP ( NP:subject-o ) ( VP:prop-o ) )', {}),
            # What is (the birth day of Obama)
            ('( NP ( NP:prop-o ) ( PP ( IN ) ( NP:subject-o ) ) )', {}),
            # What is (Obama's birthday)
            ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )', {}
             ),
            # What is (Obama's birth day)
            ('( NP ( NP:subject-o ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )',
             {}),
            # What is (Barrack Obama's birthday)
            ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/NNS:prop-o ) $ )',
             {}),
            # What is (Barack Obama's birth day)
            ('( NP ( NP:subject-o ( NNP ) ( NNP ) ( POS ) ) ( NN/JJ:prop-o ) ( NN/NNS:prop2-o ) )',
             {}),
            # What was (the Soviet Union’s motto)
            ('( NP ( NP:subject-o ( DT ) ( NNP ) ( NNP ) ( POS ) ) ( NN:prop-o ) )',
             {}),
            ('( NP:subject-o )', {}),
        ])
        """
        Rule for matching subject property query
        Matches:
        - qtype               : Question type (who, where, what, when)
        - subject             :  Subject to get property of
        - prop      (optional): Property to get of subject
        - prop2     (optional): Second part of property
        - prop3     (optional): Overwrite property
        - jj        (optional): Adjective that will be property (e.g. many/tall/high)

        Examples:
        - What religion is Obama?
        - Who did Obama marry?
        - Who is Obama?
        - Who is Barack Obama's wife?
        - How tall is Mt. Everest?
        """
        self.subject_prop_rules = {
            '( SBARQ ( WHNP/WHADVP/WHADJP:qtype_t ) ( SQ:sq_t ) )': {
                'qtype_t':
                OrderedDict([
                    # What religion
                    ('( WHNP ( WDT:qtype-o=what ) ( NN:prop3-o ) )', {}),
                    # What genre
                    ('( WHNP ( WDT:qtype-o=what ) ( NP ( NN:prop3-o ) ) )', {}
                     ),
                    # How many/tall
                    ('( WHADJP ( WRB:qtype-o ) ( JJ:jj-o ) )', {}),
                    # which country
                    ('( WHNP ( WDT:qtype-o=which ) ( NN ) )', {}),
                    # What/where/who
                    ('( WHNP/WHADVP:qtype-o )', {}),
                ]),
                'sq_t': {
                    # What ethnicity is Obama
                    '( SQ ( VP ( ADVP:prop-o ) ) ( VBZ ) ( VP:suject-o ) )':
                    {},
                    # Who did Obama marry
                    '( SQ ( VBD:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': {
                        'subj_t': self.subj_rules
                    },
                    # Who did
                    '( SQ ( VP ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) ) )': {
                        'subj_t': self.subj_rules
                    },
                    # Who is Edward Thatch known as
                    '( SQ ( VBZ:action-o ) ( NP:subj_t ) ( VP:prop-o ) )': {
                        'subj_t': self.subj_rules,
                    },
                    # Which country is Deshauna Barber a citizen of?
                    '( SQ ( VBZ:action-o ) ( NP:subject-o ( NNP ) ( NNP ) ) ( NP ( NP ( DT ) ( NN:prop-o ) ) ( PP ( IN ) ) ) )':
                    {},
                    # What is Obama
                    '( SQ ( VBZ/VBD/VBP:action-o ) ( NP:subj_t ) )': {
                        'subj_t': self.subj_rules
                    }
                }
            }
        }
        """
        Rule for getting property of NP or VP
        Matches:
        prop : Property of instance to match
        op   : Operation to match property
        value: Value of property 

        Examples:
        - born in 1950
        - have population over 100,000
        """
        self.prop_rules = OrderedDict([
            #
            ('( SQ/VP ( VB/VBP/VBD ) ( VP ( VBN:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) ) )',
             {}),
            # are in Asia
            ('( SQ/VP ( VB/VBP/VBD=are ) ( PP ( IN:op-o ) ( NP:value-o ) ) )',
             {}),
            # died from laryngitis
            ('( SQ/VP ( VB/VBP/VBD:prop-o ) ( PP ( IN:op-o ) ( NP:value-o ) ) )',
             {}),
            # have population over 1000000
            ('( SQ/VP ( VB/VBP/VBD ) ( NP ( NP:prop-o ) ( PP ( IN:op-o ) ( NP/CD/JJ:value-o ) ) ) )',
             {}),
            ('( SQ/VP ( VB/VBP/VBD ) ( NP:prop-o ) ( NP ( QP ( JJR:op-o ) ( IN ) ( CD:value-o ) ) ) )',
             {}),
            ('( SQ/VP ( VB/VBP/VBD ) ( NP ( QP ( JJR:op-o ) ( IN=than ) ( NP/CD/JJ:value-o ) ) ( NNS:value_units-o ) ) )',
             {}),
            ('( PP ( IN:op-o ) ( NP ( NP:value-o ) ( PP:pp_t ) ) )', {}),
            ('( PP ( IN:op-o ) ( NP:value-o ) )', {}),
        ])
        """
        Rules for finding entity queries
        Matches:
        qtype                   : question type (how many, which)
        inst                    : instance of entity to match
        prop_match_t  (optional): Parse tree for first property match
        prop_match2_t (optional): Parse tree for second property match

        Examples:
        - How many POTUS are there?
        - Which POTUS are born in 1950?
        - How many books are written by George Orwell?
        - How many countries are in Asia and have population over 100,000?
        """
        self.find_entity_rules = OrderedDict([
            ('( SBARQ ( WHNP ( WHNP ( WHADJP:qtype-o ) ( NNS:inst-O ) ) ( PP:prop_match_t ) ) )',
             {}),
            ('( SBARQ ( WHNP:qtype-o=who ) ( SQ:sq_t ) )', {
                'sq_t': {
                    '( SQ ( VBD/VBZ ) ( NP ( NP:inst-O ) ( PP:prop_match_t ) ) )':
                    {}
                },
            }),
            (
                '( SBARQ ( WHNP ( WHADJP/WDT/WHNP:qtype-o ) ( NNS/NN/NP:inst-O ) ) ( SQ:sq_t ) )',
                {
                    'sq_t':
                    OrderedDict([
                        # are there
                        ('( SQ ( VBP ) ( NP ( EX=there ) ) )', {}),
                        # are in Asia and have population over 100,000
                        ('( SQ ( VP ( VP:prop_match_t ) ( CC ) ( VP:prop_match2_t ) ) )',
                         {}),
                        ('( SQ ( VP:prop_match_t ) )', {}),
                        ('( SQ:prop_match_t )', {}),
                    ])
                }),
        ])

        self.wd = WikiData()

    def subject_query(self,
                      qtype,
                      subject,
                      action,
                      jj=None,
                      prop=None,
                      prop2=None,
                      prop3=None):
        """Transforms matched context into query parameters and performs query

        Args:
            qtype: Matched type of query (what, who, where, etc.)
            subject: Matched subject (Obama)
            action: Matched verb action (is, was, ran)
            jj (optional): Matched adverb
            prop (optional): Matched prop
            prop2 (optional): Matched prop
            prop3 (optional): Matched prop

        Returns:
            Answer: Answer from query, or empty Answer if None
        """
        print()
        print(qtype)
        print(subject)
        print(action)
        print(jj)
        print(prop)
        print(prop2)
        print(prop3)
        print()

        if jj == 'old':
            # How old is Obama?
            prop = 'age'

        if jj in ['tall', 'high']:
            # How tall is Yao Ming / Eifel tower?
            prop = 'height'

        if prop2:
            prop = prop + ' ' + prop2

        if prop3 and not prop:
            prop = prop3

        if not prop:
            if action not in ['is', 'was']:
                prop = action

        if qtype == 'who' and prop.endswith('ed'):
            prop += ' by'

        if qtype == 'where' and prop == 'founded':
            prop = 'location of formation'

        if qtype == 'where' and action == 'is':
            prop = 'located'

        if qtype == 'how' and prop == 'died':
            prop = 'manner of death'

        if qtype == 'where' and prop == 'died':
            prop = 'place of death'

        if prop in ['birth day', 'birthday']:
            prop = 'date of birth'

        ans = self.get_property(qtype, subject, prop)
        if not ans:
            ans = Answer()

        ans.params = {
            'qtype': qtype,
            'subject': subject,
            'prop': prop,
        }

        if (prop == "population"):
            ans.data = ans.data[-1]

        print(ans.params)
        return ans

    def get_prop_tuple(self,
                       prop=None,
                       value=None,
                       op=None,
                       value_units=None,
                       pp_t=None):
        """Returns a property tuple (prop, value, op). E.g. (population, 1000000, >)
        
        Args:
            prop (str): Property to search for (e.g. population)
            value (str): Value property should equal (e.g. 10000000)
            op (str): Operator for value of property (e.g. >)

        Returns:
            tuple: Property tuple, e.g: (population, 10000000, >)
        """

        self.info('Prop tuple: {0},{1},{2},{3},{4}', prop, value, op,
                  value_units, pp_t)

        if op in ['in', 'by', 'of', 'from']:
            oper = op
        elif op in ['over', 'above', 'more', 'greater']:
            oper = '>'
        elif op in ['under', 'below', 'less']:
            oper = '<'
        else:
            self.error('NO OP {0}', op)
            return None

        # Infer property to match value
        if prop is None:
            if value_units is not None:
                if value_units in ['people']:
                    prop = 'population'
                if not prop:
                    return None

        props = [(prop, value, oper)]

        if pp_t:
            prop_tuple = match_rules(pp_t, self.prop_rules,
                                     self.get_prop_tuple)
            if not prop_tuple:
                return None
            props += prop_tuple

        return props

    def find_entity_query(self,
                          qtype,
                          inst,
                          prop_match_t=None,
                          prop_match2_t=None):
        """Transforms matched context into query parameters and performs query for
        queries to find entities

        Args:
            qtype (str): Matched type of query (what, who, where, etc.)
            inst (str): Matched instance of entity to match (Obama)
            action (str): Matched verb action (is, was, ran)
            prop_match_t (Tree): Matched property Tree
            prop_match2_t (Tree): Matched property Tree

        Returns:
            Answer: Answer from query, or empty Answer if None
        """
        props = []
        if prop_match_t:
            prop = match_rules(prop_match_t, self.prop_rules,
                               self.get_prop_tuple)

            if not prop:
                return

            props += prop

        if prop_match2_t:
            prop = match_rules(prop_match2_t, self.prop_rules,
                               self.get_prop_tuple)

            if not prop:
                return

            props += prop

        if not inst.isupper():
            inst = singularize(inst)

        ans = self.wd.find_entity(qtype, inst, props)
        if not ans:
            ans = Answer()

        ans.params = {
            'qtype': qtype,
            'inst': inst,
            'props': props,
        }
        return ans

    def get_property(self, qtype, subject, prop):
        """Gets property of a subject
        Example: 
            get_property('who', 'Obama', 'wife') = 'Michelle Obama'

        Args:
            subject: Subject to get property of
            prop: Property to get of subject

        Todo:
            * Add other APIs here

        Returns:
            Answer: Answer from query
        """
        return self.wd.get_property(qtype, subject, prop)

    def preprocess(self, sent):
        """Preprocesses a query by adding punctuation"""
        if sent[-1] != '?':
            sent = sent + '?'
        return sent

    def query(self, sent, format_='plain'):
        """Answers a query

        If format is plain, will return the answer as a string
        If format is raw, will return the raw context of query

        Args:
            sent: Query sentence
            format_: Format of answer to return (Default to plain)

        Returns:
            dict: Answer context
            str: Answer as a string

        Raises:
            ValueError: If format_ is incorrect
        """
        sent = self.preprocess(sent)
        tree = self.parser.parse(sent)
        context = {'query': sent, 'tree': tree}
        self.info(tree)
        ans = first([
            match_rules(tree, self.find_entity_rules, self.find_entity_query),
            match_rules(tree, self.subject_prop_rules, self.subject_query),
        ])

        if not ans:
            ans = Answer()

        ans.query = sent
        ans.tree = str(tree)

        if format_ == 'raw':
            return ans.to_dict()
        elif format_ == 'plain':
            return ans.to_plain()
        else:
            raise ValueError('Undefined format: %s' % format_)
Пример #9
0
 def __init__(self, host='localhost', port=9000, properties={}):
     self.parser = StanfordServerParser(host, port, properties)
Пример #10
0
class QueryParser:
    """Parse an English-language question.
    
    ``QueryParser`` uses part-of-speech tagging and pattern matching to
    turn a natural-language question into a structured query.

    Parameters
    ----------
    
    port : int, optional
       The port on which the Stanford CoreNLP Server listens. [default: 9501]

    """
    def __init__(self, port=9501):
        self.parser = StanfordServerParser(port=port)
        self.rules = self.get_rules()
        
    def get_rules(self):
        rules_outcome = {
          '( SBARQ ( WHNP ( WDT ) ( NP:np ) ) ( SQ ( VP ( VBZ:action-o ) ( PP:pp1 ) ( PP:pp2 ) ) ) )': {
            'np': {
                '( NP:relation-o )': {}
            },
            'pp1': {
                '( PP ( TO=to ) ( NP:to_object-o ) )': {},
                '( PP ( IN=from ) ( NP:from_object-o ) )': {}
            },
            'pp2': {
                '( PP ( TO=to ) ( NP:to_object-o ) )': {},
                '( PP ( IN=from ) ( NP:from_object-o ) )': {}
            }
          },

          '( SBARQ ( WHNP ( WDT ) ( NP:np1 ) ) ( SQ ( VP ( VBZ:action-o ) ( NP:np2 ) ( PP:pp ) ) ) )': {
            'np1': {
                '( NP:relation-o )': {}
            },
            'np2': {
                '( NP:from_object-o )': {}
            },
            'pp': {
                '( PP ( TO=to ) ( NP:to_object-o ) )': {}
            }
          },
          
          '( SBAR ( WHNP ( WDT ) ) ( S ( NP:np1 ) ( VP ( VBZ:action-o ) ( NP:np2 ) ( PP:pp ) ) ) )': {
            'np1': {
                '( NP:relation-o )': {}
            },
            'np2': {
                '( NP:from_object-o )': {}
            },
            'pp': {
                '( PP ( TO=to ) ( NP ( NN:to_object-o ) ( NN ) ) )': {}
            }
          },

          '( SBARQ ( WHNP ( WP ) ) ( SQ ( VBZ:action-o ) ( NP ( NP:np ) ( PP:pp ) ) ) )': {
            'np': {
                '( NP:relation-o )': {}
            },
            'pp': {
                '( PP ( IN=between ) ( NP ( NN:from_object-o ) ( CC=and ) ( NN:to_object-o ) ) )': {}
            }
          },

          '( S ( VP ( VB:action-o ) ( NP ( NP:np ) ( PP:pp ) ) ) )': {
            'np': {
                '( NP:relation-o )': {}
            },
            'pp': {
                '( PP ( IN=between ) ( NP ( NN:from_object-o ) ( CC=and ) ( NN:to_object-o ) ) )': {}
            }
          },

          '( NP ( NP ( NN:action-o ) ) ( NP ( NP:np ) ( PP:pp ) ) )': {
            'np': {
                '( NP:relation-o )': {}
            },
            'pp': {
                '( PP ( IN=between ) ( NP ( NN:from_object-o ) ( CC=and ) ( NN:to_object-o ) ) )': {}
            }
          },

          '( SBARQ ( WHNP ( WP ) ) ( SQ ( VBZ:action-o ) ( NP ( NP:np ) ( PP:pp ) ) ) )': {
            'np': {
                '( NP:relation-o )': {}
            },
            'pp': {
                '( PP ( IN=between ) ( NP:compound_object-o ) )': {}
            }
          }
        }
        
        
        rules_protects = {
          '( SBARQ ( WHNP ( WDT ) ( NP:np ) ) ( SQ ( VP ( VBP:relation-o ) ( PP:pp ) ) ) )': {
            'np': {
                '( NP:from_object-o )': {}
            },
            'pp': {
                '( PP ( IN=from ) ( NP:to_object-o ) )': {}
            }
          },
            
          '( SBARQ ( WHNP ( WDT ) ( NP:np ) ) ( SQ ( VP ( VBZ:relation-o ) ( PP:pp ) ) ) )': {
            'np': {
                '( NP:from_object-o )': {}
            },
            'pp': {
                '( PP ( IN=from ) ( NP:to_object-o ) )': {}
            }
          },

          '( FRAG ( SBAR ( WHNP ( WDT ) ) ( S ( NP:np ) ( VP ( VBP:relation-o ) ( PP:pp ) ) ) ) )': {
            'np': {
                '( NP:from_object-o )': {}
            },
            'pp': {
                '( PP ( IN=from ) ( NP:to_object-o ) )': {}
            }
          }
        }

        return({**rules_outcome, **rules_protects})

    def process_matches(self, relation, from_object=None, to_object=None, compound_object=None):
        if compound_object is not None:
            (to_object, from_object) = compound_object.split(' and ')
        return({'from':{'term':from_object}, 'to':{'term':to_object}, 'relation':{'term':relation}})
        
    def parse(self, question):
        """Parse a natural-language question.
        
        Parameters
        ----------
        question : str
            The input question.
        
        Returns
        -------
        terms : dict
            A dictionary of parsed terms.
        
        """
        tree = self.parser.parse(question)
        terms = match_rules(tree, self.rules, self.process_matches)
        if terms is None:
            return {}
        
        mesh = MeshTools()
        terms['from'].update({k:v for k,v in mesh.get_best_term_entity(terms['from']['term']).items() if k in ('entity', 'bound')})
        terms['to'].update({k:v for k,v in mesh.get_best_term_entity(terms['to']['term']).items() if k in ('entity', 'bound')})
        
        # if terms['from']['entity'] is None:
            # tm = NCITTermMapper()
            # terms['from']['entity'] = tm.get_entity(terms['from']['term'])
            # terms['from']['bound'] = True
        
        # if terms['to']['entity'] is None:
            # tm = NCITTermMapper()
            # terms['to']['entity'] = tm.get_entity(terms['to']['term'])
            # terms['to']['bound'] = True

        # if terms['relation']['term'] == 'clinical outcome pathway' and terms['to']['entity'] in ('GeneticCondition', 'Symptom'):
            # terms['to']['entity'] = 'Disease'
        
        
        ## cheat for proof-of-concept
        ## if automatic entity parsing fails, get the entities from the original definitions in the question files
        if terms['from']['entity'] is None:
            db = 'data/reasoner_data.sqlite'
            conn = sqlite3.connect(db)
            c = conn.cursor()

            query = 'SELECT entity FROM ncats_entity_map WHERE term = "%s"' % terms['from']['term']
            result = next(iter(c.execute(query).fetchall()), [])
            if len(result) > 0:
                terms['from']['entity'] = result[0]
                terms['from']['bound'] = True
                
        if terms['to']['entity'] is None:
            db = 'data/reasoner_data.sqlite'
            conn = sqlite3.connect(db)
            c = conn.cursor()

            query = 'SELECT entity FROM ncats_entity_map WHERE term = "%s"' % terms['to']['term']
            result = next(iter(c.execute(query).fetchall()), [])
            if len(result) > 0:
                terms['to']['entity'] = result[0]
                terms['to']['bound'] = True
        
        terms['from']['bound'] = bool(terms['from']['bound'])
        terms['to']['bound'] = bool(terms['to']['bound'])
        
        return terms
Пример #11
0
 def __init__(self, port=9501):
     self.parser = StanfordServerParser(port=port)
     self.rules = self.get_rules()
Пример #12
0
from collections import OrderedDict
import os
from lango.parser import StanfordServerParser
from lango.matcher import match_rules



parser = StanfordServerParser()

sents = [
    'Call me an Uber.',
    'Get my mother some flowers.',
    'Find me a pizza with extra cheese.',
    'Give Sam\'s dog a biscuit from Petshop.'
]

"""
me.call({'item': u'uber'})
my.mother.get({'item': u'flowers'})
me.order({'item': u'pizza', u'with': u'extra cheese'})
sam.dog.give({'item': u'biscuit', u'from': u'petshop'})
"""

subj_obj_rules = {
    'subj_t': OrderedDict([
        # my brother / my mother
        ('( NP ( PRP$:subject-o=my ) ( NN:relation-o ) )', {}),
        # Sam's dog
        ('( NP ( NP ( NNP:subject-o ) ( POS ) ) ( NN:relation-o ) )', {}),
        # me