Exemplo n.º 1
0
class EagerArithmeticDomain(Domain):
    def train_examples(self):
        return [
            convert_example(ex) for ex in ArithmeticDomain().train_examples()
        ]

    def test_examples(self):
        return [
            convert_example(ex) for ex in ArithmeticDomain().test_examples()
        ]

    def dev_examples(self):
        return [
            convert_example(ex) for ex in ArithmeticDomain().dev_examples()
        ]

    numeral_rules = ArithmeticDomain.numeral_rules

    operator_rules = [
        Rule('$BinOp', 'plus', lambda x: (lambda y: x + y)),
        Rule('$BinOp', 'minus', lambda x: (lambda y: x - y)),
        Rule('$BinOp', 'times', lambda x: (lambda y: x * y)),
        Rule('$UnOp', 'minus', lambda x: -1 * x),
    ]

    compositional_rules = [
        Rule('$E', '$EBO $E', lambda sems: sems[0](sems[1])),
        Rule('$EBO', '$E $BinOp', lambda sems: sems[1](sems[0])),
        Rule('$E', '$UnOp $E', lambda sems: sems[0](sems[1])),
    ]

    def rules(self):
        return self.numeral_rules + self.operator_rules + self.compositional_rules

    def grammar(self):
        return Grammar(rules=self.rules(), start_symbol='$E')

    def execute(self, semantics):
        return semantics

    def training_metric(self):
        return DenotationAccuracyMetric()
Exemplo n.º 2
0
def cartesian_product_of_lexical_rules(rules, restrict_by_lhs=True):
    """
    Expands the given collection of rules by iterating through all possible
    pairs of existing lexical rules and adding a new rule which combines the RHS
    of the first rule with the semantics of the second.  If restrict_by_lhs is
    true, we only consider pairs which have the same LHS, which helps to avoid
    constructing malformed semantics.
    """
    from itertools import product
    from parsing import Rule, is_lexical
    lexical_rules = [rule for rule in rules if is_lexical(rule)]
    expanded_rules = [rule for rule in rules if not is_lexical(rule)]
    # Partition rules by lhs.
    lexical_rules_by_lhs = defaultdict(list)
    for rule in lexical_rules:
        lhs = rule.lhs if restrict_by_lhs else 'dummy'
        lexical_rules_by_lhs[lhs].append(rule)
    # In each partition, iterate through Cartesian product of lexical rules.
    for lhs, rules in list(lexical_rules_by_lhs.items()):
        sems = set([rule.sem for rule in rules])
        for rule, sem in product(rules, sems):
            expanded_rules.append(Rule(rule.lhs, rule.rhs, sem))
    return expanded_rules
def load_rules():
    rules = []

    def push_list(head, tail):
        return [head] + [tail]

    def varname(i):
        return "v%s" % i

    def to_int(sem):
        if isinstance(sem, tuple):
            return to_int(sem[0])
        else:
            try:
                return int(sem)
            except (ValueError, TypeError) as _:
                return 1

    for i, w in enumerate(NUMBERS):
        rules.append(Rule('$Num', str(i), i))
        rules.append(Rule('$Num', "- %s" % i, -i))
        rules.append(Rule('$Num', w, i))
        rules.append(Rule('$Num', "negative %s" % w, -i))
        if '-' in w:
            rules.append(Rule('$Num', w.replace('-', ' '), i))
        if ' and ' in w:
            rules.append(Rule('$Num', w.replace(' and ', ' '), i))

    rules.extend([
        # Odd type of problem: 'four plus four' -> x = 4 + 4
        Rule('$E', '$Expr', lambda sems: ('=', sems[0], varname(0))),

        # Usual types of problem strucutre
        Rule('$E', '?$Command $ConstraintList ?$Command',
             lambda sems: sems[1]),
        Rule('$ConstraintList', '$Constraint ?$EOL', lambda sems: sems[0]),
        Rule('$ConstraintList', '$Constraint ?$EOL ?$Joiner $ConstraintList',
             lambda sems: push_list(sems[0], sems[3])),
        Rule('$Joiner', 'and'),

        # Generic constraint
        Rule('$Constraint', '$EBO $Expr', lambda sems:
             (sems[0][0], sems[0][1], sems[1])),
        Rule('$EBO', '$Expr $Compare', lambda sems: (sems[1], sems[0])),
        Rule('$EOL', '.'),
        Rule('$EOL', ','),
        Rule('$EOL', '?'),
        Rule('$Comma', ','),

        # Constraints with leading or trailing Junk
        Rule('$JunkList', '$Junk ?$JunkList'),
        Rule('$Constraint', '$Find $Constraint', lambda sems: sems[1]),
        Rule('$Constraint', '$Find $JunkList $If $Constraint',
             lambda sems: sems[3]),
        Rule('$Constraint', '$If $Constraint ?$EOL $Find $JunkList',
             lambda sems: sems[1]),
        Rule('$If', 'if'),
        Rule('$If', 'such that'),
        Rule('$Find', 'find'),
        Rule('$Find', 'what'),

        # Pre or postfix command sentence.
        # TODO: extract a semantic meaning like ('find smallest') or ('find all')
        Rule('$Command', '$Find $JunkList ?$EOL'),
        Rule('$Command', '$What $WordIs $JunkList ?$EOL'),
        Rule('$Command', '$I $Have $JunkList ?$EOL'),
        Rule('$Command', '$Given $JunkList ?$EOL'),
        Rule('$What', 'what'),
        Rule('$WordIs', 'is'),
        Rule('$WordIs', 'are'),
        Rule('$Have', 'have'),
        Rule('$I', 'i'),
        Rule('$Given', 'given'),
    ])

    # Complex constraint: 'When x is added to y the result is z'
    rules.extend([
        Rule('$Constraint',
             '$Occasion $Expr $OccasionOpRtoL $Expr ?$EOL $ResultsIn $Expr',
             lambda sems: ('=', (sems[2], sems[1], sems[3]), sems[6])),
        Rule('$Occasion', 'when'),
        Rule('$Occasion', 'if'),
        Rule('$OccasionOpRtoL', 'is added to', '+'),
        Rule('$OccasionOpRtoL', 'is multiplied by', '*'),
        Rule('$OccasionOpRtoL', 'is divided by', '/'),
        Rule('$Constraint',
             '$Occasion $Expr $OccasionOpLtoR $Expr ?$EOL $ResultsIn $Expr',
             lambda sems: ('=', (sems[2], sems[3], sems[1]), sems[6])),
        Rule('$OccasionOpLtoR', 'is subtracted from', '-'),
        Rule('$ResultsIn', 'the result is'),
    ])

    # Non-standard constraint OperateAndEquality
    rules.extend([
        Rule('$Constraint', '?$Question $ExprList $OperatorAndEquality $Expr',
             lambda sems: ('=', (sems[2], sems[1]), sems[3])),
        Rule('$OperatorAndEquality', 'total to', '+'),
        Rule('$OperatorAndEquality', 'sum to', '+'),
        Rule('$OperatorAndEquality', 'total', '+'),
        Rule('$OperatorAndEquality', 'sum', '+'),
        Rule('$OperatorAndEquality', 'add up to', '+'),
        Rule('$OperatorAndEquality', 'have a sum of', '+'),
        Rule('$OperatorAndEquality', 'have a total of', '+'),
        Rule('$OperatorAndEquality', 'have a difference of', '-'),
        Rule('$OperatorAndEquality', 'have the sum of', '+'),
        Rule('$OperatorAndEquality', 'have the total of', '+'),
        Rule('$OperatorAndEquality', 'have the difference of', '-'),
        Rule('$OperatorAndEquality', 'differ by', '-'),
        Rule('$Question', 'which'),
        Rule('$Question', 'what'),
    ])

    # PreOperator
    rules.append(
        Rule('$Expr', '$PreOperator $ExprList', lambda sems:
             (sems[0], sems[1])))
    rules.append(
        Rule('$Expr', '$PreUnaryOperator $Expr', lambda sems:
             (sems[0], sems[1])))
    for prefix in ['', 'the ']:
        rules.extend([
            Rule('$PreOperator', prefix + 'sum of', '+'),
            Rule('$PreOperator', prefix + 'product of', '*'),
            Rule('$PreOperator', prefix + 'quotient of', '/'),
            Rule('$PreUnaryOperator', prefix + 'square root of', '^(1/2)'),
            Rule('$PreUnaryOperator', prefix + 'square of', '^2'),
            Rule('$PreUnaryOperator', prefix + 'cube of', '^3'),
        ])

    rules.append(
        Rule('$Expr', '$RevPreOperator $ExprList', lambda sems:
             (sems[0], tuple(reversed(sems[1])))))
    for prefix in ['', 'the ']:
        rules.extend([
            Rule('$RevPreOperator', prefix + 'difference of', '-'),
            Rule('$RevPreOperator', prefix + 'difference between', '-'),
        ])

    rules.append(
        Rule('$Expr', '$Multiplier $Expr', lambda sems: ('*',
                                                         (sems[0], sems[1]))))

    rules.extend([
        Rule('$Multiplier', 'twice', 2),
        Rule('$Multiplier', 'triple', 3),
        Rule('$Multiplier', 'quadruple', 4),
        Rule('$Multiplier', 'half', 1. / 2),

        # two times the first plus 'a fourth the second'
        Rule('$Multiplier', '?$A $Fraction ?$Of', lambda sems: sems[1]),
        # two times the first plus ' fourth of the second'
        Rule('$Multiplier', '$Expr $Of', lambda sems: sems[0]),
        # two times the first plus '3/4 of the second'
        Rule('$Multiplier', '$Num $Div $Num',
             lambda sems: 1. * sems[0] / sems[2]),
        Rule('$Of', 'of'),
        Rule('$A', 'one'),
        Rule('$A', 'a'),
        Rule('$Div', '/')
    ])

    for prefix in ['', 'one-']:
        rules.extend([
            Rule('$Fraction', prefix + 'fifth', 1. / 5),
            Rule('$Fraction', prefix + 'fourth', 1. / 4),
            Rule('$Fraction', prefix + 'third', 1. / 3),
            Rule('$Fraction', prefix + 'third', 1. / 3),
            Rule('$Fraction', prefix + 'half', 1. / 2),
        ])

    def consecutive_integers(n, is_even, mult=None):
        # n -> number of Integers
        # is_even -> (True, False, None) == (even, odd, consec)
        try:
            count = int(n)
        except (ValueError, TypeError) as e:
            try:
                count = NUMBERS.index(n)
            except:
                count = 2  # TODO: not this number
        start = -1 if is_even == False else 0
        if mult is None:
            mult = 2 if is_even in (True, False) else 1
        return tuple('%s*k+%s' % (mult, mult * i + start)
                     for i in range(count))

    rules.extend([
        # ExprList
        Rule('$ExprList', '$Expr $And $Expr', lambda sems: (sems[0], sems[2])),
        Rule('$And', 'and'),
        Rule('$ExprList', '$The ?$SetDescriptor ?$Integers',
             tuple(varname(i) for i in [0, 1])),
        Rule('$ExprList', '?$The ?$SetDescriptor $Two ?$Integers',
             tuple(varname(i) for i in [0, 1])),
        Rule('$ExprList', '$The ?$SetDescriptor ?$Integers',
             tuple(varname(i) for i in [0, 1, 2])),
        Rule('$ExprList', '?$The ?$SetDescriptor $Three ?$Integers',
             tuple(varname(i) for i in [0, 1, 2])),
        Rule('$The', 'the'),
        Rule('$Two', '2'),
        Rule('$Two', 'two'),
        Rule('$Three', '3'),
        Rule('$Three', 'three'),
        Rule('$SetDescriptor', 'same'),
        Rule('$SetDescriptor', 'all'),
        Rule('$ExprList', '?$The $EndDescriptor $Two $Integers',
             lambda sems: tuple(varname(i * sems[1]) for i in [0, 1])),
        Rule('$EndDescriptor', 'larger', -1),
        Rule('$EndDescriptor', 'largest', -1),
        Rule('$EndDescriptor', 'smaller', 0),
        Rule('$EndDescriptor', 'smallest', 0),

        # # Is this crazy?! Probably!
        # Rule('$ExprList', 'its digits',
        #     (('%', ('/', varname(0), 10), 10), ('%', varname(0), 10))),
        # Rule('$ExprList', 'the digits of a two-digit number',
        #     (('%', ('/', varname(0), 10), 10), ('%', varname(0), 10))),
        # Rule('$ExprList', 'the digits of a 2-digit number',
        #     (('%', ('/', varname(0), 10), 10), ('%', varname(0), 10))),
        # Rule('$ExprList', 'the digits',
        #     (('%', ('/', varname(0), 10), 10), ('%', varname(o), 10))),
        Rule('$ExprList', '$ExprList $PostMappingOperator', lambda sems: tuple(
            (sems[1], item) for item in sems[0])),
        Rule('$PostMappingOperator', 'whose squares', '^2'),
        Rule('$ExprList', '$PreMappingOperator $ExprList', lambda sems: tuple(
            (sems[0], item) for item in sems[1])),
        Rule('$PreMappingOperator', 'the squares of', '^2'),
        Rule('$PreMappingOperator', 'the roots of', '^(.5)'),
        Rule('$PreMappingOperator', 'the reciprocals of', '^(-1)'),
        Rule(
            '$ExprList',
            '$Expr ?$Sign $Consecutive ?$Sign ?$Even ?$Sign $Integers ?$Parenthetical',
            lambda sems: tuple(varname(i) for i in range(to_int(sems[0])))),
        Rule('$Consecutive', 'consecutive'),
        Rule('$Even', 'even', True),
        Rule('$Even', 'odd', False),
        Rule('$Integers', 'integers'),
        Rule('$Integers', 'numbers'),
        Rule('$Sign', 'positive'),
        Rule('$Sign', 'negative'),
        Rule('$ExprList', '$Num $Consecutive $Multiples $Of $Num',
             lambda sems: tuple(varname(i) for i in range(sems[0]))),
        Rule('$Multiples', 'multiples'),
        Rule('$Parenthetical', '$Expr $Comma $Expr ?$Comma $And $Expr'),

        # MidOperator
        Rule('$Expr', '$Expr ?$Comma $MidOperator $Expr ?$Comma', lambda sems:
             (sems[2], sems[0], sems[3])),
    ])

    rules.extend([
        # Word
        Rule('$MidOperator', 'plus', '+'),
        Rule('$MidOperator', 'minus', '+'),
        Rule('$MidOperator', 'times', '*'),
        Rule('$MidOperator', 'time', '*'),
        Rule('$MidOperator', 'modulo', '%'),
    ])

    for prefix in ['', 'when ']:
        rules.extend([
            Rule('$MidOperator', prefix + 'added to', '+'),
            Rule('$MidOperator', prefix + 'multiplied by', '+'),
            Rule('$MidOperator', prefix + 'divided by', '/'),
            Rule('$MidOperator', prefix + 'decreased by', '-'),
        ])

    rules.extend([
        # Literal
        Rule('$MidOperator', '+', '+'),
        Rule('$MidOperator', '-', '-'),
        Rule('$MidOperator', '*', '*'),
        Rule('$MidOperator', '/', '/'),
        Rule('$MidOperator', '%', '%'),
        # Complex structure
        Rule('$MidOperator', 'more than', '+'),
    ])

    rules.extend([
        Rule('$Expr', '$Expr ?$Comma $RevMidOperator $Expr ?$Comma',
             lambda sems: (sems[2], sems[3], sems[0])),
        Rule('$RevMidOperator', 'less than', '-'),
    ])

    rules.extend([
        # Comparisons
        Rule('$Compare', 'is', '='),
        Rule('$Compare', 'equals', '='),
        Rule('$Compare', '=', '='),
        Rule('$Compare', 'is equal to', '='),
        Rule('$Compare', 'is less than', '<'),
        Rule('$Compare', 'is less than or equal to', '<='),
        Rule('$Compare', 'is greater than', '>'),
        Rule('$Compare', 'is greater than or equal to', '>='),

        # SplitComparison
        # Type a. X exceeds Y by Z
        Rule('$Constraint', '$Expr $SplitComparison $Expr $By $Expr',
             lambda sems: ('=', (sems[0], (sems[1], sems[2], sems[4])))),
        # Type b: X is Z more than Y
        Rule('$Constraint', '$Expr $Is $Expr $SplitComparison $Expr',
             lambda sems: ('=', (sems[0], (sems[3], sems[4], sems[2])))),
        Rule('$SplitComparison', 'exceeds', '+'),
        Rule('$SplitComparison', 'is greater than', '+'),
        Rule('$SplitComparison', 'is less than', '-'),
        Rule('$SplitComparison', 'more than', '+'),
        Rule('$SplitComparison', 'less than', '-'),
        Rule('$By', 'by'),
        Rule('$Is', 'is'),
    ])

    rules.extend([
        # Properties
        Rule('$Expr', 'its square', ('^2', varname(0))),
        Rule('$Expr', 'its root', ('^1/2', varname(0))),

        # These examples make me uncomfortable a little.
        # Find two consecutive ints which add to 4 and 'whose product is X'
        # Can we fix coref?
        Rule('$Expr', '$Group $GroupOp', lambda sems: (sems[1], sems[0])),
        Rule('$Group', 'their_2', tuple(varname(i) for i in [0, 1])),
        Rule('$Group', 'their_3', tuple(varname(i) for i in [0, 1, 2])),
        Rule('$Group', 'whose_2', tuple(varname(i) for i in [0, 1])),
        Rule('$Group', 'whose_3', tuple(varname(i) for i in [0, 1, 2])),
        Rule('$Group', 'the_2', tuple(varname(i) for i in [0, 1])),
        Rule('$Group', 'the_3', tuple(varname(i) for i in [0, 1, 2])),
        Rule('$GroupOp', 'sum', '+'),
        Rule('$GroupOp', 'sums', '+'),
        Rule('$GroupOp', 'difference', '-'),
        Rule('$GroupOp', 'differences', '-'),
        Rule('$GroupOp', 'product', '*'),
        Rule('$GroupOp', 'products', '*'),

        # This one feels safe: 'two consecutive ints whose sum is 7'
        Rule('$Expr', '$ExprList $Group $GroupOp', lambda sems:
             (sems[2], sems[0])),
    ])

    rules.extend([
        # Numbers and Variables
        Rule('$Expr', '$Num', lambda sems: (sems[0])),
        Rule('$Expr', '$Var', lambda sems: (sems[0])),
        Rule('$Var', 'x', varname(0)),
        Rule('$Var', 'y', varname(1)),
        Rule('$Var', 'z', varname(2)),
        Rule('$Number', 'number'),
        Rule('$Number', 'no .'),
        Rule('$Number', 'integer'),
        Rule('$Number', 'one'),  # 'the smaller one'
        Rule('$PrimaryArticle', 'a'),
        Rule('$PrimaryArticle', 'an'),
        Rule('$PrimaryArticle', 'one'),
        Rule('$PrimaryArticle', 'the'),
        Rule('$PrimaryArticle', 'the smallest'),
        Rule('$PrimaryArticle', 'the smaller'),
        Rule('$PrimaryArticle', 'the least'),
        Rule('$PrimaryArticle', 'the same'),
        Rule('$PrimaryArticle', 'that'),
        Rule('$PrimaryArticle', 'the first'),
        Rule('$Var', '$PrimaryArticle ?$NumberDescriptor ?$Number',
             varname(0)),
        # Rule('$Var', '$PrimaryArticle ?$NumberDescriptor ?$Number', varname(1)),
        Rule('$NumberDescriptor', 'positive'),
        Rule('$NumberDescriptor', 'constant'),
        Rule('$NumberDescriptor', 'negative'),
        Rule('$NumberDescriptor', 'whole'),
        Rule('$NumberDescriptor', 'natural'),
        Rule('$Var', '$SecondaryArticle ?$NumberDescriptor ?$Number',
             varname(1)),
        # Rule('$Var', '$SecondaryArticle ?$NumberDescriptor ?$Number', varname(0)),
        Rule('$SecondaryArticle', 'another'),
        Rule('$SecondaryArticle', 'the other'),
        Rule('$SecondaryArticle', 'the larger'),
        Rule('$SecondaryArticle', 'the second'),
        Rule('$SecondaryArticle', 'a larger'),
        Rule('$SecondaryArticle', 'a second'),
        Rule('$Var', '$TertiaryArticle ?$NumberDescriptor ?$Number',
             varname(2)),
        Rule('$TertiaryArticle', 'the largest'),
        Rule('$TertiaryArticle', 'the greatest'),
        Rule('$TertiaryArticle', 'the third'),
        Rule('$TertiaryArticle', 'a largest'),
        Rule('$TertiaryArticle', 'a third'),
        Rule('$Expr', '$Selector $ExprList', lambda sems: sems[1][sems[0]]),
        Rule('$Selector', 'the smallest of', 0),
        Rule('$Selector', 'the largest of', -1),
    ])

    # Add in a class called '$Junk' for words that don't matter
    # Vocab.txt contains all the vocab used in grammar
    with open('vocab.txt') as f:
        for line in f:
            rules.append(Rule('$Junk', line.strip()))

    return rules
Exemplo n.º 4
0
class ArithmeticDomain(Domain):
    def train_examples(self):
        return [
            Example(input="one plus one", semantics=('+', 1, 1), denotation=2),
            Example(input="one plus two", semantics=('+', 1, 2), denotation=3),
            Example(input="one plus three",
                    semantics=('+', 1, 3),
                    denotation=4),
            Example(input="two plus two", semantics=('+', 2, 2), denotation=4),
            Example(input="two plus three",
                    semantics=('+', 2, 3),
                    denotation=5),
            Example(input="three plus one",
                    semantics=('+', 3, 1),
                    denotation=4),
            Example(input="three plus minus two",
                    semantics=('+', 3, ('~', 2)),
                    denotation=1),
            Example(input="two plus two", semantics=('+', 2, 2), denotation=4),
            Example(input="three minus two",
                    semantics=('-', 3, 2),
                    denotation=1),
            Example(input="minus three minus two",
                    semantics=('-', ('~', 3), 2),
                    denotation=-5),
            Example(input="two times two", semantics=('*', 2, 2),
                    denotation=4),
            Example(input="two times three",
                    semantics=('*', 2, 3),
                    denotation=6),
            Example(input="three plus three minus two",
                    semantics=('-', ('+', 3, 3), 2),
                    denotation=4),
        ]

    def test_examples(self):
        return [
            Example(input="minus three", semantics=('~', 3), denotation=-3),
            Example(input="three plus two",
                    semantics=('+', 3, 2),
                    denotation=5),
            Example(input="two times two plus three",
                    semantics=('+', ('*', 2, 2), 3),
                    denotation=7),
            Example(input="minus four", semantics=('~', 4), denotation=-4),
        ]

    def dev_examples(self):
        return arithmetic_dev_examples

    numeral_rules = [
        Rule('$E', 'one', 1),
        Rule('$E', 'two', 2),
        Rule('$E', 'three', 3),
        Rule('$E', 'four', 4),
    ]

    operator_rules = [
        Rule('$UnOp', 'minus', '~'),
        Rule('$BinOp', 'plus', '+'),
        Rule('$BinOp', 'minus', '-'),
        Rule('$BinOp', 'times', '*'),
    ]

    compositional_rules = [
        Rule('$E', '$UnOp $E', lambda sems: (sems[0], sems[1])),
        Rule('$EBO', '$E $BinOp', lambda sems: (sems[1], sems[0])),
        Rule('$E', '$EBO $E', lambda sems: (sems[0][0], sems[0][1], sems[1])),
    ]

    def rules(self):
        return self.numeral_rules + self.operator_rules + self.compositional_rules

    def operator_precedence_features(self, parse):
        """
        Traverses the arithmetic expression tree which forms the semantics of
        the given parse and adds a feature (op1, op2) whenever op1 appears
        lower in the tree than (i.e. with higher precedence than) than op2.
        """
        def collect_features(semantics, features):
            if isinstance(semantics, tuple):
                for child in semantics[1:]:
                    collect_features(child, features)
                    if isinstance(child, tuple) and child[0] != semantics[0]:
                        features[(child[0], semantics[0])] += 1.0

        features = defaultdict(float)
        collect_features(parse.semantics, features)
        return features

    def features(self, parse):
        features = rule_features(parse)
        features.update(self.operator_precedence_features(parse))
        return features

    def weights(self):
        weights = defaultdict(float)
        weights[('*', '+')] = 1.0
        weights[('*', '-')] = 1.0
        weights[('~', '+')] = 1.0
        weights[('~', '-')] = 1.0
        weights[('+', '*')] = -1.0
        weights[('-', '*')] = -1.0
        weights[('+', '~')] = -1.0
        weights[('-', '~')] = -1.0
        return weights

    def grammar(self):
        return Grammar(rules=self.rules(), start_symbol='$E')

    ops = {
        '~': lambda x: -x,
        '+': lambda x, y: x + y,
        '-': lambda x, y: x - y,
        '*': lambda x, y: x * y,
    }

    def execute(self, semantics):
        if isinstance(semantics, tuple):
            op = self.ops[semantics[0]]
            args = [self.execute(arg) for arg in semantics[1:]]
            return op(*args)
        else:
            return semantics

    def training_metric(self):
        return DenotationAccuracyMetric()
Exemplo n.º 5
0
# This means that you can't treat `point` like the other binary operators in your syntactic grammar.
# This will require you to add special rules to handle the internal structure of these decimal numbers.

# In[ ]:

from arithmetic import ArithmeticDomain
from parsing import Rule, add_rule

# Clear out the grammar; remove this if you want your question 1
# extension to combine with these extensions:
math_domain = ArithmeticDomain()
math_grammar = math_domain.grammar()

# Remember to add these rules to the grammar!
integer_rules = [
    Rule('$I', 'one', 1),
    Rule('$I', 'two', 2),
    Rule('$I', 'three', 3),
    Rule('$I', 'four', 4)
]

tens_rules = [
    Rule('$T', 'one', 1),
    Rule('$T', 'two', 2),
    Rule('$T', 'three', 3),
    Rule('$T', 'four', 4)
]

# Add the above rules to math_grammar:

# Add rules to the grammar for using the above:
Exemplo n.º 6
0
from parsing import Grammar, Rule

rules = [
    Rule('$ROOT', '$Type ?$Type', lambda sems: sems),
    Rule('$Type', '$Person', lambda sems: sems[0]),
    Rule('$Type', '$Song', lambda sems: sems[0]),
    Rule('$Person', '谢霆锋', '谢霆锋'),
    Rule('$Person', '谢贤', '谢贤'),
    Rule('$Song', '歌唱祖国', '歌唱祖国'),
    Rule('$Loction', '香港', '香港'),
    Rule('$Person', '人', 'who'),
    #Rule('$Person','$Loction $Person', lambda sems: (sems[1],"born("+sems[1]+") = " + sems[0])),
    Rule('$Person', '$Loction $Person', lambda sems: (sems[0], sems[1])),
    Rule('$Which', '哪个', '哪个'),
    Rule('$Person', '$Which $Person', lambda sems: sems[1]),
    Rule('$Relation', '$FwdRelation', lambda sems: (lambda arg:
                                                    (sems[0], arg))),
    Rule('$FwdRelation', '父亲', '父亲'),
    Rule('$FwdRelation', '儿子', '儿子'),
    Rule('$FwdRelation', '老公', '老公'),
    Rule('$FwdRelation', '歌曲', '歌曲'),
    Rule('$FwdRelation', '唱 的', '歌曲'),
    Rule('$De', '的', '的'),
    Rule('$Person', '谁', 'who'),
    Rule('$Equal', '是', 'Equal'),
    Rule('$Type', '$Type $Equal $Type', lambda sems:
         (sems[1], sems[0], sems[2])),
    #Rule('$Person','$Person $Relation', lambda sems: sems[1](sems[0]) )
    Rule('$Type', '$Type ?$De $Relation', lambda sems: sems[2](sems[0]))
]
grammar = Grammar(rules=rules)
Exemplo n.º 7
0
class TravelDomain(Domain):
    def __init__(self):
        self.geonames_annotator = GeoNamesAnnotator()

    def train_examples(self):
        return travel_train_examples

    def dev_examples(self):
        return travel_dev_examples

    def test_examples(self):
        return travel_test_examples

    # Define the basic structure of a $TravelQuery.
    # A $TravelQuery is a sequence of one or more $TravelQueryElements.
    # A $TravelQueryElement is either a $TravelLocation or a $TravelArgument.
    # EXERCISE: This approach permits any number of $FromLocations and $ToLocations.
    # Find a way to require that (a) there is at least one location,
    # (b) there are not multiple $FromLocations or $ToLocations.
    rules_travel = [
        Rule('$ROOT', '$TravelQuery', sems_0),
        Rule('$TravelQuery', '$TravelQueryElements',
             lambda sems: merge_dicts({'domain': 'travel'}, sems[0])),
        Rule('$TravelQueryElements', '$TravelQueryElement ?$TravelQueryElements',
             lambda sems: merge_dicts(sems[0], sems[1])),
        Rule('$TravelQueryElement', '$TravelLocation', sems_0),
        Rule('$TravelQueryElement', '$TravelArgument', sems_0),
    ]

    # Define query elements which specify the origin or destination.
    rules_travel_locations = [
        Rule('$TravelLocation', '$ToLocation', sems_0),
        Rule('$TravelLocation', '$FromLocation', sems_0),
        Rule('$ToLocation', '$To $Location', lambda sems: {'destination': sems[1]}),
        Rule('$FromLocation', '$From $Location', lambda sems: {'origin': sems[1]}),
        Rule('$To', 'to'),
        Rule('$From', 'from'),
    ]

    # Allow travel arguments which specify the mode of travel.
    # Raises oracle accuracy to ~20%.
    # All lexical items are either obvious or attested in training data.
    rules_travel_modes = [
        Rule('$TravelArgument', '$TravelMode', sems_0),

        Rule('$TravelMode', '$AirMode', {'mode': 'air'}),
        Rule('$TravelMode', '$BikeMode', {'mode': 'bike'}),
        Rule('$TravelMode', '$BoatMode', {'mode': 'boat'}),
        Rule('$TravelMode', '$BusMode', {'mode': 'bus'}),
        Rule('$TravelMode', '$CarMode', {'mode': 'car'}),
        Rule('$TravelMode', '$TaxiMode', {'mode': 'taxi'}),
        Rule('$TravelMode', '$TrainMode', {'mode': 'train'}),
        Rule('$TravelMode', '$TransitMode', {'mode': 'transit'}),

        Rule('$AirMode', 'air fare'),
        Rule('$AirMode', 'air fares'),
        Rule('$AirMode', 'airbus'),
        Rule('$AirMode', 'airfare'),
        Rule('$AirMode', 'airfares'),
        Rule('$AirMode', 'airline'),
        Rule('$AirMode', 'airlines'),
        Rule('$AirMode', '?by air'),
        Rule('$AirMode', 'flight'),
        Rule('$AirMode', 'flights'),
        Rule('$AirMode', 'fly'),

        Rule('$BikeMode', '?by bike'),
        Rule('$BikeMode', 'bike riding'),

        Rule('$BoatMode', '?by boat'),
        Rule('$BoatMode', 'cruise'),
        Rule('$BoatMode', 'cruises'),
        Rule('$BoatMode', 'norwegian cruise lines'),

        Rule('$BusMode', '?by bus'),
        Rule('$BusMode', 'bus tours'),
        Rule('$BusMode', 'buses'),
        Rule('$BusMode', 'shutle'),
        Rule('$BusMode', 'shuttle'),

        Rule('$CarMode', '?by car'),
        Rule('$CarMode', 'drive'),
        Rule('$CarMode', 'driving'),
        Rule('$CarMode', 'gas'),

        Rule('$TaxiMode', 'cab'),
        Rule('$TaxiMode', 'car service'),
        Rule('$TaxiMode', 'taxi'),

        Rule('$TrainMode', '?by train'),
        Rule('$TrainMode', 'trains'),
        Rule('$TrainMode', 'amtrak'),

        Rule('$TransitMode', '?by public transportation'),
        Rule('$TransitMode', '?by ?public transit'),
    ]

    # Allow arguments which indicate travel without specifying a mode.
    # Adds roughly 4% in oracle accuracy.
    rules_travel_triggers = [
        Rule('$TravelArgument', '$TravelTrigger', {}),
        # All of the following lexical rules are obvious or are based on
        # inspection of training data -- not inspection of test data!
        Rule('$TravelTrigger', 'tickets'),
        Rule('$TravelTrigger', 'transportation'),
        Rule('$TravelTrigger', 'travel'),
        Rule('$TravelTrigger', 'travel packages'),
        Rule('$TravelTrigger', 'trip'),
    ]

    # Allow travel arguments which specify the type of information requested.
    rules_request_types = [
        Rule('$TravelArgument', '$RequestType', sems_0),

        Rule('$RequestType', '$DirectionsRequest', {'type': 'directions'}),
        Rule('$RequestType', '$DistanceRequest', {'type': 'distance'}),
        Rule('$RequestType', '$ScheduleRequest', {'type': 'schedule'}),
        Rule('$RequestType', '$CostRequest', {'type': 'cost'}),

        Rule('$DirectionsRequest', 'directions'),
        Rule('$DirectionsRequest', 'how do i get'),
        Rule('$DistanceRequest', 'distance'),
        Rule('$ScheduleRequest', 'schedule'),
        Rule('$CostRequest', 'cost'),
    ]

    # Allow optional words around travel query elements.
    rules_optionals = [
        # EXERCISE: These rules introduce some spurious ambiguity.  Figure out
        # why, and propose a way to avoid or minimize the spurious ambiguity.
        Rule('$TravelQueryElement', '$TravelQueryElement $Optionals', sems_0),
        Rule('$TravelQueryElement', '$Optionals $TravelQueryElement', sems_1),

        Rule('$Optionals', '$Optional ?$Optionals'),

        Rule('$Optional', '$Show'),
        Rule('$Optional', '$Modifier'),
        Rule('$Optional', '$Carrier'),
        Rule('$Optional', '$Stopword'),
        Rule('$Optional', '$Determiner'),

        Rule('$Show', 'book'),
        Rule('$Show', 'give ?me'),
        Rule('$Show', 'show ?me'),

        Rule('$Modifier', 'cheap'),
        Rule('$Modifier', 'cheapest'),
        Rule('$Modifier', 'discount'),
        Rule('$Modifier', 'honeymoon'),
        Rule('$Modifier', 'one way'),
        Rule('$Modifier', 'direct'),
        Rule('$Modifier', 'scenic'),
        Rule('$Modifier', 'transatlantic'),
        Rule('$Modifier', 'one day'),
        Rule('$Modifier', 'last minute'),

        Rule('$Carrier', 'delta'),
        Rule('$Carrier', 'jet blue'),
        Rule('$Carrier', 'spirit airlines'),
        Rule('$Carrier', 'amtrak'),

        Rule('$Stopword', 'all'),
        Rule('$Stopword', 'of'),
        Rule('$Stopword', 'what'),
        Rule('$Stopword', 'will'),
        Rule('$Stopword', 'it'),
        Rule('$Stopword', 'to'),

        Rule('$Determiner', 'a'),
        Rule('$Determiner', 'an'),
        Rule('$Determiner', 'the'),
    ]

    # Allow any query to be parsed as a non-travel query.
    rules_not_travel = [
        Rule('$ROOT', '$NotTravelQuery', sems_0),
        Rule('$NotTravelQuery', '$Text', {'domain': 'other'}),
        Rule('$Text', '$Token ?$Text'),
    ]

    def rules(self):
        return (                                  # semantics oracle accuracy
            self.rules_travel +                   # 0% train, 0% test
            self.rules_travel_locations +         # 0% train, 0% test
            self.rules_travel_modes +             # 13% train, 4% test
            self.rules_travel_triggers +          # 17% train, 12% test
            self.rules_request_types +            # 20% train, 16% test
            self.rules_optionals +                # 40% train, 20% test
            self.rules_not_travel +               # 57% train, 48% test
            []
        )

    def annotators(self):
        return [TokenAnnotator(), self.geonames_annotator]

    def grammar(self):
        return Grammar(rules=self.rules(), annotators=self.annotators())

    def features(self, parse):
        return rule_features(parse)

    def metrics(self):
        return semantics_match_metrics() + [HasTravelParseMetric()]
Exemplo n.º 8
0
 def rules(self):
     return [
         Rule('$ROOT', '?$Optionals $Location ?$Optionals', sems_1),
         Rule('$Optionals', '$Optional ?$Optionals'),
         Rule('$Optional', '$Token'),
     ]
Exemplo n.º 9
0
from parsing import parse_input, Grammar, Rule, Parse
from annotators import *
from operator import itemgetter


def merge_dicts(*dicts):
    result = dict()
    for dct in dicts:
        if not dct:
            continue
        result.update(dct)
    return result


decl_rules = [
    Rule('$ROOT', '$Declare $Declaration',
         lambda sems: merge_dicts({'request': 'declare'}, sems[1])),
    Rule('$Declare', 'declare', itemgetter(0)),
    Rule('$Declare', 'create', itemgetter(0)),
    Rule('$Declare', 'define', itemgetter(0)),
    Rule('$Declaration', '$DeclarationElement', itemgetter(0)),
    Rule('$Declaration', '$DeclarationElement $DeclarationElement',
         lambda sems: merge_dicts(sems[0], sems[1])),
    Rule('$Declaration',
         '$DeclarationElement $DeclarationElement $DeclarationElement',
         lambda sems: merge_dicts(sems[0], sems[1], sems[2])),
    Rule(
        '$Declaration',
        '$DeclarationElement $DeclarationElement $DeclarationElement\
          $DeclarationElement',
        lambda sems: merge_dicts(sems[0], sems[1], sems[2], sems[3])),
]