示例#1
0
    def __init__(self, *args, **kwargs):
        """
        Note that we can't change the signature of __init__ because we're inheriting from TestCase.
        TestCase needs to be able to init this class with its expected args/kwargs.
        """
        logger.debug(
            f'RegexTemplate ({type(self).__name__}) is starting __init__...')
        t0 = time.perf_counter_ns()
        super().__init__(*args, **kwargs)  # init TestCase parent class
        time_for_parent_init = time.perf_counter_ns() - t0

        # Only need to do the rest of initializing if we're initializing a subclass of RegexTemplate
        if type(self) == RegexTemplate:
            return

        # Check that slots, templates, positive_examples and negative_examples have been defined by the subclass
        assert self.slots is not None, 'self.slots should not be None. It should be defined as a class constant in the class inheriting from RegexTemplate.'
        assert self.templates is not None, 'self.templates should not be None. It should be defined as a class constant in the class inheriting from RegexTemplate.'
        assert self.positive_examples is not None, 'self.positive_examples should not be None. It should be defined as a class constant in the class inheriting from RegexTemplate.'
        assert self.negative_examples is not None, 'self.negative_examples should not be None. It should be defined as a class constant in the class inheriting from RegexTemplate.'

        # In self.slots, convert lists of strings to "OR" regex strings
        for name, value in self.slots.items():
            if isinstance(value, list):
                self.slots[name] = oneof(value)
            else:
                assert isinstance(
                    value, str
                ), f"The values in the slots dictionary should be either strings or lists of strings, not {type(value)}"

        # Dictionary from slot name -> regex for that slot in a named group
        # e.g. 'my_name_is' -> '(?P<my_name_is>my name is|call me|i'm called)'
        slot_name_to_regex_group = {
            slot_name: "(?P<{}>{})".format(slot_name, slot_regex)
            for slot_name, slot_regex in self.slots.items()
        }

        # For each template, replace each {slot_name} with its regex. Also add start and end characters (^ and $).
        # e.g. '{my_name_is} {name}' -> '^(?P<my_name_is>my name is|call me|i'm called) (?P<name>.+?)$'
        regexes = [
            '^' + template.format(**slot_name_to_regex_group) + '$'
            for template in self.templates
        ]

        # Compile the regexes
        t0_compile = time.perf_counter_ns()
        self.compiled_regexes = []
        for r in regexes:
            # t0_indiv = time.perf_counter_ns()
            self.compiled_regexes.append(re.compile(r))
            # logger.debug(f'RegexTemplate ({type(self).__name__}) took {(time.perf_counter_ns()-t0_indiv)/10**9} seconds to compile {r}')
        time_to_compile = time.perf_counter_ns() - t0_compile

        logger.debug(
            f'RegexTemplate ({type(self).__name__}) finished __init__, compiling {len(self.compiled_regexes)} regexes. '
            f'Took {(time.perf_counter_ns()-t0)/10**9} seconds total, of which {time_for_parent_init/10**9} seconds were for TestCase.__init__ '
            f'and {time_to_compile/10**9} seconds were for re.compile.')
示例#2
0
class PositiveNavigationTemplate(RegexTemplate):
    """
    This template captures when the user is expressing a positive navigational intent like
    "i want to talk about X", "i want to talk about", "i want to talk".
    """
    slots = {
        'nav':
        one_or_more_spacesep(POSITIVE_NAVIGATION),
        'nav_about':
        one_or_more_spacesep(['{} about'.format(oneof(POSITIVE_NAVIGATION))]),
        'topic':
        NONEMPTY_TEXT,
    }
    templates = [
        OPTIONAL_TEXT_PRE + "{nav_about}( {topic})?",
        OPTIONAL_TEXT_PRE + "{nav}( {topic})?",
    ]
    positive_examples = [
        ('alexa can you please talk something else', {
            'nav': 'can you please talk',
            'topic': 'something else'
        }),
        ('tell me jokes', {
            'nav': 'tell me',
            'topic': 'jokes'
        }),
        ('tell me a bit', {
            'nav': 'tell me a bit'
        }),
        ("talk about zebras", {
            'nav_about': "talk about",
            'topic': 'zebras'
        }),
        ("talk to me", {
            'nav': "talk to me"
        }),
        ("let's talk about zebras", {
            'nav_about': "let's talk about",
            'topic': 'zebras'
        }),
        ("no i wanna learn about microbiology", {
            'nav_about': "i wanna learn about",
            'topic': 'microbiology'
        }),
        ("can we chat about", {
            'nav_about': "we chat about"
        }),
        ("sure let's talk about it", {
            'nav_about': "let's talk about",
            'topic': 'it'
        }),
        ("can you talk about life", {
            'nav_about': "you talk about",
            'topic': 'life'
        }),
        ("can you tell me about golden buddha", {
            'nav_about': 'you tell me about',
            'topic': 'golden buddha'
        }),
        ("talk more about it please", {
            'nav_about': 'talk more about',
            'topic': 'it please'
        }),
        ("tell me a story about mexico", {
            'nav': 'tell me a',
            'topic': 'story about mexico'
        }),
        ("i need to talk about you", {
            'nav_about': 'i need to talk about',
            'topic': 'you'
        }),
        ("talk to me", {
            'nav': "talk to me"
        }),
        ("i would like to talk to you about cern", {
            'nav_about': "i would like to talk to you about",
            'topic': 'cern'
        }),
        ("talk to me about ash wednesday", {
            'nav_about': "talk to me about",
            'topic': 'ash wednesday'
        }),
        ("no tell me about your childhood", {
            'nav_about': "no tell me about",
            'topic': 'your childhood'
        }),
        ("i don't want to talk about zebras i want to talk about giraffes", {
            'nav_about': "i want to talk about",
            'topic': 'giraffes'
        }),
        ("tell me about giraffes i don't want to talk about zebras", {
            'nav_about': "tell me about",
            'topic': "giraffes i don't want to talk about zebras"
        }),
        ("i'm interested in spiders", {
            'nav': "i'm interested in",
            'topic': 'spiders'
        }),
        ("i am interested in sports", {
            'nav': "i am interested in",
            'topic': 'sports'
        }),
        ("actually i'm really into arachnids", {
            'nav': "i'm really into",
            'topic': 'arachnids'
        }),
        ("hey what do you know about whales", {
            'nav_about': 'do you know about',
            'topic': 'whales'
        }),
        ("do you know anything about whales", {
            'nav_about': 'do you know anything about',
            'topic': 'whales'
        }),
        ("alexa do you know much i'm not sure if you do but yeah anything about whales",
         {
             'nav_about':
             "do you know much i'm not sure if you do but yeah anything about",
             'topic': 'whales'
         }),
        ("do you know whales live a hundred years", {
            'nav': 'do you know',
            'topic': 'whales live a hundred years'
        }),
        ("have you ever heard of ariel osbourne", {
            'nav': 'you ever heard of',
            'topic': 'ariel osbourne'
        }),
        ("what do you think about veganism", {
            'nav_about': 'what do you think about',
            'topic': 'veganism'
        }),
        ("let's switch topics what do you think is going to happen", {
            'nav': 'what do you think',
            'topic': 'is going to happen'
        }),
    ]
    negative_examples = [
        "into politics",
        "why do you want to talk about zebras",
        "stop talk about zebras",
        "don't talk about zebras",
        "i'm not interested in spiders",
        "why are you interested in spiders",
        "do you think i'm cool",
        "i haven't heard of it",
        "do you think so",
    ]
示例#3
0
        'mostly',
        'here',
        'might',
        'go',
        'ahead',
        'gonna',
        'did',
        'oh',
        'actually',
        'trying',
    ] + INTENSIFIERS + CONVERSANTS + POSNAV_ANCHORS))

# A talk phrase must have one or more TALK words, can optionally be preceded by TALK_PHRASE_PRECEDERS,
# and optionally followed by things like "to me more about"
TALK_PHRASE = "({preceder} )*{talk}( (to|with) {listener})*( {listener})*( (more|a|bit|little))*".format(
    preceder=oneof(TALK_PHRASE_PRECEDERS),
    talk=one_or_more_spacesep(TALK),
    listener=oneof(CONVERSANTS))

INTERESTED_IN = "({intensifier} )*(interested in|into)".format(
    conversant=one_or_more_spacesep(CONVERSANTS),
    intensifier=oneof(INTENSIFIERS))

POSITIVE_NAVIGATION = [

    # This posnav template needs a posnav anchor (like "let's", "i", "can"), then one or more talk_phrases.
    # Anything can precede except "why do"
    "(?<!why do ){anchor} {talk_phrase}".format(
        anchor=one_or_more_spacesep(POSNAV_ANCHORS),
        talk_phrase=one_or_more_spacesep([TALK_PHRASE])),
示例#4
0
class NegativeNavigationTemplate(RegexTemplate):
    """
    This template captures when the user is expressing a negative navigational intent like
    "i don't want to talk about X", "i don't want to talk about", "i don't want to talk", "change the subject".
    """
    slots = {
        'change_the_subject':
        "(?<!don't )(change|new) (the )?(subject|category|topic)",
        'nav':
        one_or_more_spacesep(NEGATIVE_NAVIGATION),
        'nav_about':
        one_or_more_spacesep(['{} about'.format(oneof(NEGATIVE_NAVIGATION))]),
        'topic':
        NONEMPTY_TEXT,
    }
    templates = [
        OPTIONAL_TEXT_PRE + "{change_the_subject}" + OPTIONAL_TEXT_POST,
        OPTIONAL_TEXT_PRE + "{nav_about}( {topic})?",
        OPTIONAL_TEXT_PRE + "{nav}( {topic})?",
    ]
    positive_examples = [
        ('change the subject', {
            'change_the_subject': 'change the subject'
        }),
        ('change the topic', {
            'change_the_subject': 'change the topic'
        }),
        ('new subject', {
            'change_the_subject': 'new subject'
        }),
        ('can we change the category', {
            'change_the_subject': 'change the category'
        }),
        ('change subject', {
            'change_the_subject': 'change subject'
        }),
        ('alexa change the subject please', {
            'change_the_subject': 'change the subject'
        }),
        ('stop talking about', {
            'nav_about': 'stop talking about'
        }),
        ('stop talking about movies', {
            'nav_about': 'stop talking about',
            'topic': 'movies'
        }),
        ('oh my god please stop talking about movies', {
            'nav_about': 'stop talking about',
            'topic': 'movies'
        }),
        ("i don't think i wanna talk to you anymore", {
            'nav': "don't think i wanna talk to you",
            'topic': 'anymore'
        }),
        ("stop talking", {
            'nav': "stop talking"
        }),
        ("i'm not interested in spiders", {
            'nav': "not interested in",
            'topic': 'spiders'
        }),
        ("we were never really interested in spiders", {
            'nav': "never really interested in",
            'topic': 'spiders'
        }),
    ]
    negative_examples = [
        'talk about movies',
        "don't change the subject",
        "no don't change the subject",
        "why don't you want to talk about it",
        "i'm interested in spiders",
        "why are you interested in spiders",
    ]
示例#5
0
    'well',
    'positive',
    'incredible',
    'upbeat',
]

BAD_PHRASES = [
    'bad',
    'sad',
    'horrible',
    'terrible',
    'depressed',
    'negative',
    'upset',
    'crummy',
    'not ((too|very|so|that) )*{}'.format(oneof(GOOD_PHRASES)),
    'down',
]

NEUTRAL_PHRASES = [
    'not ((too|very|so|that) )*{}'.format(oneof(BAD_PHRASES)), 'okay', 'ok',
    'fine', 'all right', 'alright', 'normal', "i don't know", 'better', 'find'
]


class GoodTemplate(RegexTemplate):
    slots = {
        'good': one_or_more_spacesep(GOOD_PHRASES),
        'preceder': OPTIONAL_TEXT_PRE,
        'follower': OPTIONAL_TEXT_POST,
    }