Пример #1
0
def build_user_utterance(
        text: str, utterance_tokenizer: UtteranceTokenizer) -> UserUtterance:
    text = clean_utterance_text(text)
    if not text:
        return UserUtterance(original_text=SpecialStrings.NULL,
                             tokens=[SpecialStrings.NULL])
    return UserUtterance(original_text=text,
                         tokens=utterance_tokenizer.tokenize(text))
Пример #2
0
def build_agent_utterance(text: str, utterance_tokenizer: UtteranceTokenizer,
                          described_entities: List[str]) -> AgentUtterance:
    text = clean_utterance_text(text)
    if not text:
        return AgentUtterance(
            original_text=SpecialStrings.NULL,
            tokens=[SpecialStrings.NULL],
            described_entities=described_entities,
        )
    return AgentUtterance(
        original_text=text,
        tokens=utterance_tokenizer.tokenize(text),
        described_entities=described_entities,
    )
def test_tokenize_utterance():
    utterance_tokenizer = UtteranceTokenizer()

    data = [
        (
            "Reschedule meeting with Barack Obama to 5/30/2019 at 3:00pm",
            [
                "Reschedule",
                "meeting",
                "with",
                "Barack",
                "Obama",
                "to",
                "5",
                "/",
                "30",
                "/",
                "2019",
                "at",
                "3",
                ":",
                "00",
                "pm",
            ],
        ),
        (
            "Can you also add icecream birthday tomorrow at 6PM?",
            [
                "Can",
                "you",
                "also",
                "add",
                "icecream",
                "birthday",
                "tomorrow",
                "at",
                "6",
                "PM",
                "?",
            ],
        ),
    ]
    for text, expected in data:
        assert utterance_tokenizer.tokenize(text) == expected