class EagerArithmeticDomain(Domain): def train_examples(self): return [ convert_example(ex) for ex in ArithmeticDomain().train_examples() ] def test_examples(self): return [ convert_example(ex) for ex in ArithmeticDomain().test_examples() ] def dev_examples(self): return [ convert_example(ex) for ex in ArithmeticDomain().dev_examples() ] numeral_rules = ArithmeticDomain.numeral_rules operator_rules = [ Rule('$BinOp', 'plus', lambda x: (lambda y: x + y)), Rule('$BinOp', 'minus', lambda x: (lambda y: x - y)), Rule('$BinOp', 'times', lambda x: (lambda y: x * y)), Rule('$UnOp', 'minus', lambda x: -1 * x), ] compositional_rules = [ Rule('$E', '$EBO $E', lambda sems: sems[0](sems[1])), Rule('$EBO', '$E $BinOp', lambda sems: sems[1](sems[0])), Rule('$E', '$UnOp $E', lambda sems: sems[0](sems[1])), ] def rules(self): return self.numeral_rules + self.operator_rules + self.compositional_rules def grammar(self): return Grammar(rules=self.rules(), start_symbol='$E') def execute(self, semantics): return semantics def training_metric(self): return DenotationAccuracyMetric()
def cartesian_product_of_lexical_rules(rules, restrict_by_lhs=True): """ Expands the given collection of rules by iterating through all possible pairs of existing lexical rules and adding a new rule which combines the RHS of the first rule with the semantics of the second. If restrict_by_lhs is true, we only consider pairs which have the same LHS, which helps to avoid constructing malformed semantics. """ from itertools import product from parsing import Rule, is_lexical lexical_rules = [rule for rule in rules if is_lexical(rule)] expanded_rules = [rule for rule in rules if not is_lexical(rule)] # Partition rules by lhs. lexical_rules_by_lhs = defaultdict(list) for rule in lexical_rules: lhs = rule.lhs if restrict_by_lhs else 'dummy' lexical_rules_by_lhs[lhs].append(rule) # In each partition, iterate through Cartesian product of lexical rules. for lhs, rules in list(lexical_rules_by_lhs.items()): sems = set([rule.sem for rule in rules]) for rule, sem in product(rules, sems): expanded_rules.append(Rule(rule.lhs, rule.rhs, sem)) return expanded_rules
def load_rules(): rules = [] def push_list(head, tail): return [head] + [tail] def varname(i): return "v%s" % i def to_int(sem): if isinstance(sem, tuple): return to_int(sem[0]) else: try: return int(sem) except (ValueError, TypeError) as _: return 1 for i, w in enumerate(NUMBERS): rules.append(Rule('$Num', str(i), i)) rules.append(Rule('$Num', "- %s" % i, -i)) rules.append(Rule('$Num', w, i)) rules.append(Rule('$Num', "negative %s" % w, -i)) if '-' in w: rules.append(Rule('$Num', w.replace('-', ' '), i)) if ' and ' in w: rules.append(Rule('$Num', w.replace(' and ', ' '), i)) rules.extend([ # Odd type of problem: 'four plus four' -> x = 4 + 4 Rule('$E', '$Expr', lambda sems: ('=', sems[0], varname(0))), # Usual types of problem strucutre Rule('$E', '?$Command $ConstraintList ?$Command', lambda sems: sems[1]), Rule('$ConstraintList', '$Constraint ?$EOL', lambda sems: sems[0]), Rule('$ConstraintList', '$Constraint ?$EOL ?$Joiner $ConstraintList', lambda sems: push_list(sems[0], sems[3])), Rule('$Joiner', 'and'), # Generic constraint Rule('$Constraint', '$EBO $Expr', lambda sems: (sems[0][0], sems[0][1], sems[1])), Rule('$EBO', '$Expr $Compare', lambda sems: (sems[1], sems[0])), Rule('$EOL', '.'), Rule('$EOL', ','), Rule('$EOL', '?'), Rule('$Comma', ','), # Constraints with leading or trailing Junk Rule('$JunkList', '$Junk ?$JunkList'), Rule('$Constraint', '$Find $Constraint', lambda sems: sems[1]), Rule('$Constraint', '$Find $JunkList $If $Constraint', lambda sems: sems[3]), Rule('$Constraint', '$If $Constraint ?$EOL $Find $JunkList', lambda sems: sems[1]), Rule('$If', 'if'), Rule('$If', 'such that'), Rule('$Find', 'find'), Rule('$Find', 'what'), # Pre or postfix command sentence. # TODO: extract a semantic meaning like ('find smallest') or ('find all') Rule('$Command', '$Find $JunkList ?$EOL'), Rule('$Command', '$What $WordIs $JunkList ?$EOL'), Rule('$Command', '$I $Have $JunkList ?$EOL'), Rule('$Command', '$Given $JunkList ?$EOL'), Rule('$What', 'what'), Rule('$WordIs', 'is'), Rule('$WordIs', 'are'), Rule('$Have', 'have'), Rule('$I', 'i'), Rule('$Given', 'given'), ]) # Complex constraint: 'When x is added to y the result is z' rules.extend([ Rule('$Constraint', '$Occasion $Expr $OccasionOpRtoL $Expr ?$EOL $ResultsIn $Expr', lambda sems: ('=', (sems[2], sems[1], sems[3]), sems[6])), Rule('$Occasion', 'when'), Rule('$Occasion', 'if'), Rule('$OccasionOpRtoL', 'is added to', '+'), Rule('$OccasionOpRtoL', 'is multiplied by', '*'), Rule('$OccasionOpRtoL', 'is divided by', '/'), Rule('$Constraint', '$Occasion $Expr $OccasionOpLtoR $Expr ?$EOL $ResultsIn $Expr', lambda sems: ('=', (sems[2], sems[3], sems[1]), sems[6])), Rule('$OccasionOpLtoR', 'is subtracted from', '-'), Rule('$ResultsIn', 'the result is'), ]) # Non-standard constraint OperateAndEquality rules.extend([ Rule('$Constraint', '?$Question $ExprList $OperatorAndEquality $Expr', lambda sems: ('=', (sems[2], sems[1]), sems[3])), Rule('$OperatorAndEquality', 'total to', '+'), Rule('$OperatorAndEquality', 'sum to', '+'), Rule('$OperatorAndEquality', 'total', '+'), Rule('$OperatorAndEquality', 'sum', '+'), Rule('$OperatorAndEquality', 'add up to', '+'), Rule('$OperatorAndEquality', 'have a sum of', '+'), Rule('$OperatorAndEquality', 'have a total of', '+'), Rule('$OperatorAndEquality', 'have a difference of', '-'), Rule('$OperatorAndEquality', 'have the sum of', '+'), Rule('$OperatorAndEquality', 'have the total of', '+'), Rule('$OperatorAndEquality', 'have the difference of', '-'), Rule('$OperatorAndEquality', 'differ by', '-'), Rule('$Question', 'which'), Rule('$Question', 'what'), ]) # PreOperator rules.append( Rule('$Expr', '$PreOperator $ExprList', lambda sems: (sems[0], sems[1]))) rules.append( Rule('$Expr', '$PreUnaryOperator $Expr', lambda sems: (sems[0], sems[1]))) for prefix in ['', 'the ']: rules.extend([ Rule('$PreOperator', prefix + 'sum of', '+'), Rule('$PreOperator', prefix + 'product of', '*'), Rule('$PreOperator', prefix + 'quotient of', '/'), Rule('$PreUnaryOperator', prefix + 'square root of', '^(1/2)'), Rule('$PreUnaryOperator', prefix + 'square of', '^2'), Rule('$PreUnaryOperator', prefix + 'cube of', '^3'), ]) rules.append( Rule('$Expr', '$RevPreOperator $ExprList', lambda sems: (sems[0], tuple(reversed(sems[1]))))) for prefix in ['', 'the ']: rules.extend([ Rule('$RevPreOperator', prefix + 'difference of', '-'), Rule('$RevPreOperator', prefix + 'difference between', '-'), ]) rules.append( Rule('$Expr', '$Multiplier $Expr', lambda sems: ('*', (sems[0], sems[1])))) rules.extend([ Rule('$Multiplier', 'twice', 2), Rule('$Multiplier', 'triple', 3), Rule('$Multiplier', 'quadruple', 4), Rule('$Multiplier', 'half', 1. / 2), # two times the first plus 'a fourth the second' Rule('$Multiplier', '?$A $Fraction ?$Of', lambda sems: sems[1]), # two times the first plus ' fourth of the second' Rule('$Multiplier', '$Expr $Of', lambda sems: sems[0]), # two times the first plus '3/4 of the second' Rule('$Multiplier', '$Num $Div $Num', lambda sems: 1. * sems[0] / sems[2]), Rule('$Of', 'of'), Rule('$A', 'one'), Rule('$A', 'a'), Rule('$Div', '/') ]) for prefix in ['', 'one-']: rules.extend([ Rule('$Fraction', prefix + 'fifth', 1. / 5), Rule('$Fraction', prefix + 'fourth', 1. / 4), Rule('$Fraction', prefix + 'third', 1. / 3), Rule('$Fraction', prefix + 'third', 1. / 3), Rule('$Fraction', prefix + 'half', 1. / 2), ]) def consecutive_integers(n, is_even, mult=None): # n -> number of Integers # is_even -> (True, False, None) == (even, odd, consec) try: count = int(n) except (ValueError, TypeError) as e: try: count = NUMBERS.index(n) except: count = 2 # TODO: not this number start = -1 if is_even == False else 0 if mult is None: mult = 2 if is_even in (True, False) else 1 return tuple('%s*k+%s' % (mult, mult * i + start) for i in range(count)) rules.extend([ # ExprList Rule('$ExprList', '$Expr $And $Expr', lambda sems: (sems[0], sems[2])), Rule('$And', 'and'), Rule('$ExprList', '$The ?$SetDescriptor ?$Integers', tuple(varname(i) for i in [0, 1])), Rule('$ExprList', '?$The ?$SetDescriptor $Two ?$Integers', tuple(varname(i) for i in [0, 1])), Rule('$ExprList', '$The ?$SetDescriptor ?$Integers', tuple(varname(i) for i in [0, 1, 2])), Rule('$ExprList', '?$The ?$SetDescriptor $Three ?$Integers', tuple(varname(i) for i in [0, 1, 2])), Rule('$The', 'the'), Rule('$Two', '2'), Rule('$Two', 'two'), Rule('$Three', '3'), Rule('$Three', 'three'), Rule('$SetDescriptor', 'same'), Rule('$SetDescriptor', 'all'), Rule('$ExprList', '?$The $EndDescriptor $Two $Integers', lambda sems: tuple(varname(i * sems[1]) for i in [0, 1])), Rule('$EndDescriptor', 'larger', -1), Rule('$EndDescriptor', 'largest', -1), Rule('$EndDescriptor', 'smaller', 0), Rule('$EndDescriptor', 'smallest', 0), # # Is this crazy?! Probably! # Rule('$ExprList', 'its digits', # (('%', ('/', varname(0), 10), 10), ('%', varname(0), 10))), # Rule('$ExprList', 'the digits of a two-digit number', # (('%', ('/', varname(0), 10), 10), ('%', varname(0), 10))), # Rule('$ExprList', 'the digits of a 2-digit number', # (('%', ('/', varname(0), 10), 10), ('%', varname(0), 10))), # Rule('$ExprList', 'the digits', # (('%', ('/', varname(0), 10), 10), ('%', varname(o), 10))), Rule('$ExprList', '$ExprList $PostMappingOperator', lambda sems: tuple( (sems[1], item) for item in sems[0])), Rule('$PostMappingOperator', 'whose squares', '^2'), Rule('$ExprList', '$PreMappingOperator $ExprList', lambda sems: tuple( (sems[0], item) for item in sems[1])), Rule('$PreMappingOperator', 'the squares of', '^2'), Rule('$PreMappingOperator', 'the roots of', '^(.5)'), Rule('$PreMappingOperator', 'the reciprocals of', '^(-1)'), Rule( '$ExprList', '$Expr ?$Sign $Consecutive ?$Sign ?$Even ?$Sign $Integers ?$Parenthetical', lambda sems: tuple(varname(i) for i in range(to_int(sems[0])))), Rule('$Consecutive', 'consecutive'), Rule('$Even', 'even', True), Rule('$Even', 'odd', False), Rule('$Integers', 'integers'), Rule('$Integers', 'numbers'), Rule('$Sign', 'positive'), Rule('$Sign', 'negative'), Rule('$ExprList', '$Num $Consecutive $Multiples $Of $Num', lambda sems: tuple(varname(i) for i in range(sems[0]))), Rule('$Multiples', 'multiples'), Rule('$Parenthetical', '$Expr $Comma $Expr ?$Comma $And $Expr'), # MidOperator Rule('$Expr', '$Expr ?$Comma $MidOperator $Expr ?$Comma', lambda sems: (sems[2], sems[0], sems[3])), ]) rules.extend([ # Word Rule('$MidOperator', 'plus', '+'), Rule('$MidOperator', 'minus', '+'), Rule('$MidOperator', 'times', '*'), Rule('$MidOperator', 'time', '*'), Rule('$MidOperator', 'modulo', '%'), ]) for prefix in ['', 'when ']: rules.extend([ Rule('$MidOperator', prefix + 'added to', '+'), Rule('$MidOperator', prefix + 'multiplied by', '+'), Rule('$MidOperator', prefix + 'divided by', '/'), Rule('$MidOperator', prefix + 'decreased by', '-'), ]) rules.extend([ # Literal Rule('$MidOperator', '+', '+'), Rule('$MidOperator', '-', '-'), Rule('$MidOperator', '*', '*'), Rule('$MidOperator', '/', '/'), Rule('$MidOperator', '%', '%'), # Complex structure Rule('$MidOperator', 'more than', '+'), ]) rules.extend([ Rule('$Expr', '$Expr ?$Comma $RevMidOperator $Expr ?$Comma', lambda sems: (sems[2], sems[3], sems[0])), Rule('$RevMidOperator', 'less than', '-'), ]) rules.extend([ # Comparisons Rule('$Compare', 'is', '='), Rule('$Compare', 'equals', '='), Rule('$Compare', '=', '='), Rule('$Compare', 'is equal to', '='), Rule('$Compare', 'is less than', '<'), Rule('$Compare', 'is less than or equal to', '<='), Rule('$Compare', 'is greater than', '>'), Rule('$Compare', 'is greater than or equal to', '>='), # SplitComparison # Type a. X exceeds Y by Z Rule('$Constraint', '$Expr $SplitComparison $Expr $By $Expr', lambda sems: ('=', (sems[0], (sems[1], sems[2], sems[4])))), # Type b: X is Z more than Y Rule('$Constraint', '$Expr $Is $Expr $SplitComparison $Expr', lambda sems: ('=', (sems[0], (sems[3], sems[4], sems[2])))), Rule('$SplitComparison', 'exceeds', '+'), Rule('$SplitComparison', 'is greater than', '+'), Rule('$SplitComparison', 'is less than', '-'), Rule('$SplitComparison', 'more than', '+'), Rule('$SplitComparison', 'less than', '-'), Rule('$By', 'by'), Rule('$Is', 'is'), ]) rules.extend([ # Properties Rule('$Expr', 'its square', ('^2', varname(0))), Rule('$Expr', 'its root', ('^1/2', varname(0))), # These examples make me uncomfortable a little. # Find two consecutive ints which add to 4 and 'whose product is X' # Can we fix coref? Rule('$Expr', '$Group $GroupOp', lambda sems: (sems[1], sems[0])), Rule('$Group', 'their_2', tuple(varname(i) for i in [0, 1])), Rule('$Group', 'their_3', tuple(varname(i) for i in [0, 1, 2])), Rule('$Group', 'whose_2', tuple(varname(i) for i in [0, 1])), Rule('$Group', 'whose_3', tuple(varname(i) for i in [0, 1, 2])), Rule('$Group', 'the_2', tuple(varname(i) for i in [0, 1])), Rule('$Group', 'the_3', tuple(varname(i) for i in [0, 1, 2])), Rule('$GroupOp', 'sum', '+'), Rule('$GroupOp', 'sums', '+'), Rule('$GroupOp', 'difference', '-'), Rule('$GroupOp', 'differences', '-'), Rule('$GroupOp', 'product', '*'), Rule('$GroupOp', 'products', '*'), # This one feels safe: 'two consecutive ints whose sum is 7' Rule('$Expr', '$ExprList $Group $GroupOp', lambda sems: (sems[2], sems[0])), ]) rules.extend([ # Numbers and Variables Rule('$Expr', '$Num', lambda sems: (sems[0])), Rule('$Expr', '$Var', lambda sems: (sems[0])), Rule('$Var', 'x', varname(0)), Rule('$Var', 'y', varname(1)), Rule('$Var', 'z', varname(2)), Rule('$Number', 'number'), Rule('$Number', 'no .'), Rule('$Number', 'integer'), Rule('$Number', 'one'), # 'the smaller one' Rule('$PrimaryArticle', 'a'), Rule('$PrimaryArticle', 'an'), Rule('$PrimaryArticle', 'one'), Rule('$PrimaryArticle', 'the'), Rule('$PrimaryArticle', 'the smallest'), Rule('$PrimaryArticle', 'the smaller'), Rule('$PrimaryArticle', 'the least'), Rule('$PrimaryArticle', 'the same'), Rule('$PrimaryArticle', 'that'), Rule('$PrimaryArticle', 'the first'), Rule('$Var', '$PrimaryArticle ?$NumberDescriptor ?$Number', varname(0)), # Rule('$Var', '$PrimaryArticle ?$NumberDescriptor ?$Number', varname(1)), Rule('$NumberDescriptor', 'positive'), Rule('$NumberDescriptor', 'constant'), Rule('$NumberDescriptor', 'negative'), Rule('$NumberDescriptor', 'whole'), Rule('$NumberDescriptor', 'natural'), Rule('$Var', '$SecondaryArticle ?$NumberDescriptor ?$Number', varname(1)), # Rule('$Var', '$SecondaryArticle ?$NumberDescriptor ?$Number', varname(0)), Rule('$SecondaryArticle', 'another'), Rule('$SecondaryArticle', 'the other'), Rule('$SecondaryArticle', 'the larger'), Rule('$SecondaryArticle', 'the second'), Rule('$SecondaryArticle', 'a larger'), Rule('$SecondaryArticle', 'a second'), Rule('$Var', '$TertiaryArticle ?$NumberDescriptor ?$Number', varname(2)), Rule('$TertiaryArticle', 'the largest'), Rule('$TertiaryArticle', 'the greatest'), Rule('$TertiaryArticle', 'the third'), Rule('$TertiaryArticle', 'a largest'), Rule('$TertiaryArticle', 'a third'), Rule('$Expr', '$Selector $ExprList', lambda sems: sems[1][sems[0]]), Rule('$Selector', 'the smallest of', 0), Rule('$Selector', 'the largest of', -1), ]) # Add in a class called '$Junk' for words that don't matter # Vocab.txt contains all the vocab used in grammar with open('vocab.txt') as f: for line in f: rules.append(Rule('$Junk', line.strip())) return rules
class ArithmeticDomain(Domain): def train_examples(self): return [ Example(input="one plus one", semantics=('+', 1, 1), denotation=2), Example(input="one plus two", semantics=('+', 1, 2), denotation=3), Example(input="one plus three", semantics=('+', 1, 3), denotation=4), Example(input="two plus two", semantics=('+', 2, 2), denotation=4), Example(input="two plus three", semantics=('+', 2, 3), denotation=5), Example(input="three plus one", semantics=('+', 3, 1), denotation=4), Example(input="three plus minus two", semantics=('+', 3, ('~', 2)), denotation=1), Example(input="two plus two", semantics=('+', 2, 2), denotation=4), Example(input="three minus two", semantics=('-', 3, 2), denotation=1), Example(input="minus three minus two", semantics=('-', ('~', 3), 2), denotation=-5), Example(input="two times two", semantics=('*', 2, 2), denotation=4), Example(input="two times three", semantics=('*', 2, 3), denotation=6), Example(input="three plus three minus two", semantics=('-', ('+', 3, 3), 2), denotation=4), ] def test_examples(self): return [ Example(input="minus three", semantics=('~', 3), denotation=-3), Example(input="three plus two", semantics=('+', 3, 2), denotation=5), Example(input="two times two plus three", semantics=('+', ('*', 2, 2), 3), denotation=7), Example(input="minus four", semantics=('~', 4), denotation=-4), ] def dev_examples(self): return arithmetic_dev_examples numeral_rules = [ Rule('$E', 'one', 1), Rule('$E', 'two', 2), Rule('$E', 'three', 3), Rule('$E', 'four', 4), ] operator_rules = [ Rule('$UnOp', 'minus', '~'), Rule('$BinOp', 'plus', '+'), Rule('$BinOp', 'minus', '-'), Rule('$BinOp', 'times', '*'), ] compositional_rules = [ Rule('$E', '$UnOp $E', lambda sems: (sems[0], sems[1])), Rule('$EBO', '$E $BinOp', lambda sems: (sems[1], sems[0])), Rule('$E', '$EBO $E', lambda sems: (sems[0][0], sems[0][1], sems[1])), ] def rules(self): return self.numeral_rules + self.operator_rules + self.compositional_rules def operator_precedence_features(self, parse): """ Traverses the arithmetic expression tree which forms the semantics of the given parse and adds a feature (op1, op2) whenever op1 appears lower in the tree than (i.e. with higher precedence than) than op2. """ def collect_features(semantics, features): if isinstance(semantics, tuple): for child in semantics[1:]: collect_features(child, features) if isinstance(child, tuple) and child[0] != semantics[0]: features[(child[0], semantics[0])] += 1.0 features = defaultdict(float) collect_features(parse.semantics, features) return features def features(self, parse): features = rule_features(parse) features.update(self.operator_precedence_features(parse)) return features def weights(self): weights = defaultdict(float) weights[('*', '+')] = 1.0 weights[('*', '-')] = 1.0 weights[('~', '+')] = 1.0 weights[('~', '-')] = 1.0 weights[('+', '*')] = -1.0 weights[('-', '*')] = -1.0 weights[('+', '~')] = -1.0 weights[('-', '~')] = -1.0 return weights def grammar(self): return Grammar(rules=self.rules(), start_symbol='$E') ops = { '~': lambda x: -x, '+': lambda x, y: x + y, '-': lambda x, y: x - y, '*': lambda x, y: x * y, } def execute(self, semantics): if isinstance(semantics, tuple): op = self.ops[semantics[0]] args = [self.execute(arg) for arg in semantics[1:]] return op(*args) else: return semantics def training_metric(self): return DenotationAccuracyMetric()
# This means that you can't treat `point` like the other binary operators in your syntactic grammar. # This will require you to add special rules to handle the internal structure of these decimal numbers. # In[ ]: from arithmetic import ArithmeticDomain from parsing import Rule, add_rule # Clear out the grammar; remove this if you want your question 1 # extension to combine with these extensions: math_domain = ArithmeticDomain() math_grammar = math_domain.grammar() # Remember to add these rules to the grammar! integer_rules = [ Rule('$I', 'one', 1), Rule('$I', 'two', 2), Rule('$I', 'three', 3), Rule('$I', 'four', 4) ] tens_rules = [ Rule('$T', 'one', 1), Rule('$T', 'two', 2), Rule('$T', 'three', 3), Rule('$T', 'four', 4) ] # Add the above rules to math_grammar: # Add rules to the grammar for using the above:
from parsing import Grammar, Rule rules = [ Rule('$ROOT', '$Type ?$Type', lambda sems: sems), Rule('$Type', '$Person', lambda sems: sems[0]), Rule('$Type', '$Song', lambda sems: sems[0]), Rule('$Person', '谢霆锋', '谢霆锋'), Rule('$Person', '谢贤', '谢贤'), Rule('$Song', '歌唱祖国', '歌唱祖国'), Rule('$Loction', '香港', '香港'), Rule('$Person', '人', 'who'), #Rule('$Person','$Loction $Person', lambda sems: (sems[1],"born("+sems[1]+") = " + sems[0])), Rule('$Person', '$Loction $Person', lambda sems: (sems[0], sems[1])), Rule('$Which', '哪个', '哪个'), Rule('$Person', '$Which $Person', lambda sems: sems[1]), Rule('$Relation', '$FwdRelation', lambda sems: (lambda arg: (sems[0], arg))), Rule('$FwdRelation', '父亲', '父亲'), Rule('$FwdRelation', '儿子', '儿子'), Rule('$FwdRelation', '老公', '老公'), Rule('$FwdRelation', '歌曲', '歌曲'), Rule('$FwdRelation', '唱 的', '歌曲'), Rule('$De', '的', '的'), Rule('$Person', '谁', 'who'), Rule('$Equal', '是', 'Equal'), Rule('$Type', '$Type $Equal $Type', lambda sems: (sems[1], sems[0], sems[2])), #Rule('$Person','$Person $Relation', lambda sems: sems[1](sems[0]) ) Rule('$Type', '$Type ?$De $Relation', lambda sems: sems[2](sems[0])) ] grammar = Grammar(rules=rules)
class TravelDomain(Domain): def __init__(self): self.geonames_annotator = GeoNamesAnnotator() def train_examples(self): return travel_train_examples def dev_examples(self): return travel_dev_examples def test_examples(self): return travel_test_examples # Define the basic structure of a $TravelQuery. # A $TravelQuery is a sequence of one or more $TravelQueryElements. # A $TravelQueryElement is either a $TravelLocation or a $TravelArgument. # EXERCISE: This approach permits any number of $FromLocations and $ToLocations. # Find a way to require that (a) there is at least one location, # (b) there are not multiple $FromLocations or $ToLocations. rules_travel = [ Rule('$ROOT', '$TravelQuery', sems_0), Rule('$TravelQuery', '$TravelQueryElements', lambda sems: merge_dicts({'domain': 'travel'}, sems[0])), Rule('$TravelQueryElements', '$TravelQueryElement ?$TravelQueryElements', lambda sems: merge_dicts(sems[0], sems[1])), Rule('$TravelQueryElement', '$TravelLocation', sems_0), Rule('$TravelQueryElement', '$TravelArgument', sems_0), ] # Define query elements which specify the origin or destination. rules_travel_locations = [ Rule('$TravelLocation', '$ToLocation', sems_0), Rule('$TravelLocation', '$FromLocation', sems_0), Rule('$ToLocation', '$To $Location', lambda sems: {'destination': sems[1]}), Rule('$FromLocation', '$From $Location', lambda sems: {'origin': sems[1]}), Rule('$To', 'to'), Rule('$From', 'from'), ] # Allow travel arguments which specify the mode of travel. # Raises oracle accuracy to ~20%. # All lexical items are either obvious or attested in training data. rules_travel_modes = [ Rule('$TravelArgument', '$TravelMode', sems_0), Rule('$TravelMode', '$AirMode', {'mode': 'air'}), Rule('$TravelMode', '$BikeMode', {'mode': 'bike'}), Rule('$TravelMode', '$BoatMode', {'mode': 'boat'}), Rule('$TravelMode', '$BusMode', {'mode': 'bus'}), Rule('$TravelMode', '$CarMode', {'mode': 'car'}), Rule('$TravelMode', '$TaxiMode', {'mode': 'taxi'}), Rule('$TravelMode', '$TrainMode', {'mode': 'train'}), Rule('$TravelMode', '$TransitMode', {'mode': 'transit'}), Rule('$AirMode', 'air fare'), Rule('$AirMode', 'air fares'), Rule('$AirMode', 'airbus'), Rule('$AirMode', 'airfare'), Rule('$AirMode', 'airfares'), Rule('$AirMode', 'airline'), Rule('$AirMode', 'airlines'), Rule('$AirMode', '?by air'), Rule('$AirMode', 'flight'), Rule('$AirMode', 'flights'), Rule('$AirMode', 'fly'), Rule('$BikeMode', '?by bike'), Rule('$BikeMode', 'bike riding'), Rule('$BoatMode', '?by boat'), Rule('$BoatMode', 'cruise'), Rule('$BoatMode', 'cruises'), Rule('$BoatMode', 'norwegian cruise lines'), Rule('$BusMode', '?by bus'), Rule('$BusMode', 'bus tours'), Rule('$BusMode', 'buses'), Rule('$BusMode', 'shutle'), Rule('$BusMode', 'shuttle'), Rule('$CarMode', '?by car'), Rule('$CarMode', 'drive'), Rule('$CarMode', 'driving'), Rule('$CarMode', 'gas'), Rule('$TaxiMode', 'cab'), Rule('$TaxiMode', 'car service'), Rule('$TaxiMode', 'taxi'), Rule('$TrainMode', '?by train'), Rule('$TrainMode', 'trains'), Rule('$TrainMode', 'amtrak'), Rule('$TransitMode', '?by public transportation'), Rule('$TransitMode', '?by ?public transit'), ] # Allow arguments which indicate travel without specifying a mode. # Adds roughly 4% in oracle accuracy. rules_travel_triggers = [ Rule('$TravelArgument', '$TravelTrigger', {}), # All of the following lexical rules are obvious or are based on # inspection of training data -- not inspection of test data! Rule('$TravelTrigger', 'tickets'), Rule('$TravelTrigger', 'transportation'), Rule('$TravelTrigger', 'travel'), Rule('$TravelTrigger', 'travel packages'), Rule('$TravelTrigger', 'trip'), ] # Allow travel arguments which specify the type of information requested. rules_request_types = [ Rule('$TravelArgument', '$RequestType', sems_0), Rule('$RequestType', '$DirectionsRequest', {'type': 'directions'}), Rule('$RequestType', '$DistanceRequest', {'type': 'distance'}), Rule('$RequestType', '$ScheduleRequest', {'type': 'schedule'}), Rule('$RequestType', '$CostRequest', {'type': 'cost'}), Rule('$DirectionsRequest', 'directions'), Rule('$DirectionsRequest', 'how do i get'), Rule('$DistanceRequest', 'distance'), Rule('$ScheduleRequest', 'schedule'), Rule('$CostRequest', 'cost'), ] # Allow optional words around travel query elements. rules_optionals = [ # EXERCISE: These rules introduce some spurious ambiguity. Figure out # why, and propose a way to avoid or minimize the spurious ambiguity. Rule('$TravelQueryElement', '$TravelQueryElement $Optionals', sems_0), Rule('$TravelQueryElement', '$Optionals $TravelQueryElement', sems_1), Rule('$Optionals', '$Optional ?$Optionals'), Rule('$Optional', '$Show'), Rule('$Optional', '$Modifier'), Rule('$Optional', '$Carrier'), Rule('$Optional', '$Stopword'), Rule('$Optional', '$Determiner'), Rule('$Show', 'book'), Rule('$Show', 'give ?me'), Rule('$Show', 'show ?me'), Rule('$Modifier', 'cheap'), Rule('$Modifier', 'cheapest'), Rule('$Modifier', 'discount'), Rule('$Modifier', 'honeymoon'), Rule('$Modifier', 'one way'), Rule('$Modifier', 'direct'), Rule('$Modifier', 'scenic'), Rule('$Modifier', 'transatlantic'), Rule('$Modifier', 'one day'), Rule('$Modifier', 'last minute'), Rule('$Carrier', 'delta'), Rule('$Carrier', 'jet blue'), Rule('$Carrier', 'spirit airlines'), Rule('$Carrier', 'amtrak'), Rule('$Stopword', 'all'), Rule('$Stopword', 'of'), Rule('$Stopword', 'what'), Rule('$Stopword', 'will'), Rule('$Stopword', 'it'), Rule('$Stopword', 'to'), Rule('$Determiner', 'a'), Rule('$Determiner', 'an'), Rule('$Determiner', 'the'), ] # Allow any query to be parsed as a non-travel query. rules_not_travel = [ Rule('$ROOT', '$NotTravelQuery', sems_0), Rule('$NotTravelQuery', '$Text', {'domain': 'other'}), Rule('$Text', '$Token ?$Text'), ] def rules(self): return ( # semantics oracle accuracy self.rules_travel + # 0% train, 0% test self.rules_travel_locations + # 0% train, 0% test self.rules_travel_modes + # 13% train, 4% test self.rules_travel_triggers + # 17% train, 12% test self.rules_request_types + # 20% train, 16% test self.rules_optionals + # 40% train, 20% test self.rules_not_travel + # 57% train, 48% test [] ) def annotators(self): return [TokenAnnotator(), self.geonames_annotator] def grammar(self): return Grammar(rules=self.rules(), annotators=self.annotators()) def features(self, parse): return rule_features(parse) def metrics(self): return semantics_match_metrics() + [HasTravelParseMetric()]
def rules(self): return [ Rule('$ROOT', '?$Optionals $Location ?$Optionals', sems_1), Rule('$Optionals', '$Optional ?$Optionals'), Rule('$Optional', '$Token'), ]
from parsing import parse_input, Grammar, Rule, Parse from annotators import * from operator import itemgetter def merge_dicts(*dicts): result = dict() for dct in dicts: if not dct: continue result.update(dct) return result decl_rules = [ Rule('$ROOT', '$Declare $Declaration', lambda sems: merge_dicts({'request': 'declare'}, sems[1])), Rule('$Declare', 'declare', itemgetter(0)), Rule('$Declare', 'create', itemgetter(0)), Rule('$Declare', 'define', itemgetter(0)), Rule('$Declaration', '$DeclarationElement', itemgetter(0)), Rule('$Declaration', '$DeclarationElement $DeclarationElement', lambda sems: merge_dicts(sems[0], sems[1])), Rule('$Declaration', '$DeclarationElement $DeclarationElement $DeclarationElement', lambda sems: merge_dicts(sems[0], sems[1], sems[2])), Rule( '$Declaration', '$DeclarationElement $DeclarationElement $DeclarationElement\ $DeclarationElement', lambda sems: merge_dicts(sems[0], sems[1], sems[2], sems[3])), ]