Exemplos de tokenize em Python, exemplos de core.tokenizer.tokenize em Python

Exemplo n.º 1

0

Exibir arquivo

    def __init__(self, agent, kb, uuid):
        '''
        Dialogue data that is needed by the model.
        '''
        self.uuid = uuid
        self.agent = agent
        self.kb = kb
        self.role = kb.role
        partner_role = 'buyer' if self.role == 'seller' else 'seller'
        self.agent_to_role = {
            self.agent: self.role,
            1 - self.agent: partner_role
        }
        # KB context
        # TODO: context_to_int will change category, title, description to integers
        self.category_str = kb.category
        self.category = kb.category
        self.title = tokenize(
            re.sub(r'[^\w0-9]', ' ', kb.facts['item']['Title']))
        self.description = tokenize(
            re.sub(r'[^\w0-9]', ' ',
                   ' '.join(kb.facts['item']['Description'])))
        # token_turns: tokens and entitys (output of entitylink)
        self.token_turns = []
        # turns: input tokens of encoder, decoder input and target, later converted to integers
        self.turns = [[], [], []]
        # entities: has the same structure as turns, non-entity tokens are None
        self.entities = []
        self.agents = []
        self.roles = []
        self.is_int = False  # Whether we've converted it to integers

        self.token_candidates = None
        self.candidates = None
        self.true_candidate_inds = None

Exemplo n.º 2

0

Exibir arquivo

 def __init__(self, agent, kb, uuid, model='seq2seq'):
     '''
     Dialogue data that is needed by the model.
     '''
     self.uuid = uuid
     self.agent = agent
     self.kb = kb
     self.model = model
     self.agent_to_role = self.get_role_mapping(agent, kb)
     # KB context
     # NOTE: context_to_int will change category, title, description to integers
     self.category_str = kb.category
     self.category = kb.category
     self.title = tokenize(
         re.sub(r'[^\w0-9]', ' ', kb.facts['item']['Title']))
     self.description = tokenize(
         re.sub(r'[^\w0-9]', ' ',
                ' '.join(kb.facts['item']['Description'])))
     # token_turns: tokens and entitys (output of entity linking)
     self.token_turns = []
     # parsed logical forms
     self.lfs = []
     # turns: input tokens of encoder, decoder input and target, later converted to integers
     self.turns = [[], [], []]
     # entities: has the same structure as turns, non-entity tokens are None
     self.entities = []
     self.agents = []
     self.roles = []
     self.is_int = False  # Whether we've converted it to integers
     self.num_context = None

Exemplo n.º 3

0

Exibir arquivo

 def process_event(self, e, kb, mentioned_entities=None, known_kb=True):
     '''
     Convert event to two lists of tokens and entities for encoding and decoding.
     '''
     if e.action == 'message':
         # Lower, tokenize, link entity
         entity_tokens = self.lexicon.link_entity(
             tokenize(e.data),
             kb=kb,
             mentioned_entities=mentioned_entities,
             known_kb=known_kb)
         #print e.data
         #print entity_tokens
         entity_tokens = [
             normalize_number(x) if not is_entity(x) else x
             for x in entity_tokens
         ]
         if entity_tokens:
             # NOTE: have two copies because we might change it given decoding/encoding
             return (entity_tokens, copy.copy(entity_tokens))
         else:
             return None
     elif e.action == 'select':
         # Convert an item to item-id (wrt to the speaker)
         item_id = self.get_item_id(kb, e.data)
         # We use the entities to represent the item during encoding and item-id during decoding
         return ([markers.SELECT] +
                 self.item_to_entities(e.data, kb.attributes),
                 [markers.SELECT, item_to_entity(item_id)])
     else:
         raise ValueError('Unknown event action.')

Exemplo n.º 4

0

Exibir arquivo

Arquivo: parser.py Projeto: lili-yu/negotiation

    def test(self, c, d, raw_utterance, lexicon):
        scenario = {'book': c[0], 'hat': c[1], 'ball': c[2]}
        proposal, _, _ = self.parse_proposal(
            lexicon.link_entity(tokenize(raw_utterance)), scenario)
        if not proposal:
            print('No offer detected: {}'.format(raw_utterance))
            return False

        passed = True
        for i, item in enumerate(('book', 'hat', 'ball')):
            if proposal[self.ME][item] != d[i]:
                passed = False
                break

        if passed:
            print("Passed")
        else:
            print("TEST SCENARIO")
            print("  There are {0} books, {1} hats, and {2} balls.".format(
                c[0], c[1], c[2]))
            print("  Sentence: {0}".format(raw_utterance))
            print("SYSTEM OUTPUT")
            print('For me:')
            print(proposal[self.ME])
            print('For you:')
            print(proposal[self.YOU])
            print(
                "  The correct proposal is {0} books, {1} hats, and {2} balls".
                format(d[0], d[1], d[2]))

        print("------------------------------")
        return passed

Exemplo n.º 5

0

Exibir arquivo

 def extract_keywords(self):
     if self.action == 'message':
         # Re-tokenize here because for tagging is case-sensitive
         tags = pos_tag(tokenize(self.text, lowercase=False))
         self.keywords = [
             word for word, tag in tags if re.match(r'NN*|ADJ*', tag)
         ]

Exemplo n.º 6

0

Exibir arquivo

Arquivo: preprocess.py Projeto: maplewzx/cocoa

 def process_event(self, e, agent, sel=None):
     # Lower, tokenize, link entity
     if e.action == 'message':
         entity_tokens = self.lexicon.link_entity(tokenize(e.data))
         return entity_tokens if entity_tokens else None
     elif e.action == 'select':
         '''
         outcome is now handled separately with its own loss function
         ---- ABOVE IS OUTDATED ----
         "sel" is short for selection, it will be a list of two agents
         where each agent is a dict with 3 keys: book, hat, ball
         '''
         entity_tokens = [markers.SELECT]
         if e.agent == agent:
             entity_tokens.extend([
                 '{count}'.format(count=sel[agent][item])
                 for item in self.lexicon.items
             ])
         ## First my selection, then partner selection
         #selections = []
         #for agent in (e.agent, 1 - e.agent):
         #    selections.extend([sel[agent][item] for item in self.lexicon.items])
         #entity_tokens.extend([str(x) for x in selections])
         return entity_tokens
     elif e.action == 'quit':
         entity_tokens = [markers.QUIT]
         return entity_tokens
     else:
         raise ValueError('Unknown event action.')

Exemplo n.º 7

0

Exibir arquivo

Arquivo: parser.py Projeto: lili-yu/negotiation

    def parse_message(self, event, dialogue_state):
        tokens = self.lexicon.link_entity(tokenize(event.data))
        utterance = Utterance(raw_text=event.data, tokens=tokens)
        intent = self.classify_intent(utterance)

        split = None
        proposal_type = None
        ambiguous_proposal = False
        if intent == 'propose':
            proposal, proposal_type, ambiguous_proposal = self.parse_proposal(
                utterance.tokens, self.kb.item_counts)
            if proposal:
                # NOTE: YOU/ME in proposal is from the partner's perspective
                split = {
                    self.agent: proposal[self.YOU],
                    self.partner: proposal[self.ME]
                }
                if dialogue_state.partner_proposal and split[
                        self.partner] == dialogue_state.partner_proposal[
                            self.partner]:
                    intent = 'insist'
        lf = LF(intent, proposal=split, proposal_type=proposal_type)
        utterance.lf = lf

        utterance.template = self.extract_template(tokens, dialogue_state)
        utterance.ambiguous_template = ambiguous_proposal

        return utterance

Exemplo n.º 8

0

Exibir arquivo

Arquivo: inference.py Projeto: dapu19/ChatApp

def process_questions(questions, return_score_modifiers = False):

    # Make a list
    if not isinstance(questions, list):
        questions = [questions]

    # Clean and tokenize
    prepared_questions = []
    for question in questions:
        question = question.strip()
        prepared_questions.append(apply_bpe(tokenize(question)) if question else '##emptyquestion##')

    # Run inference
    answers_list = inference_helper(prepared_questions)

    # Process answers
    prepared_answers_list = []
    for index, answers in enumerate(answers_list):
        answers = detokenize(answers)
        answers = replace_in_answers(answers)
        answers = normalize_new_lines(answers)
        answers_score = score_answers(questions[index], answers)
        best_index, best_score = get_best_score(answers_score['score'])

        if prepared_questions[index] == '##emptyquestion##':
            prepared_answers_list.append(None)
        elif return_score_modifiers:
            prepared_answers_list.append({'answers': answers, 'scores': answers_score['score'], 'best_index': best_index, 'best_score': best_score, 'score_modifiers': answers_score['score_modifiers']})
        else:
            prepared_answers_list.append({'answers': answers, 'scores': answers_score['score'], 'best_index': best_index, 'best_score': best_score})

    return prepared_answers_list

Exemplo n.º 9

0

Exibir arquivo

Arquivo: inference.py Projeto: FurkanThePythoneer/ChatBot-NMT

def process_questions(questions, include_blacklisted = True):

    # Make a list
    if not isinstance(questions, list):
        questions = [questions]

    # Clean and tokenize
    prepared_questions = []
    for question in questions:
        question = question.strip()
        prepared_questions.append(tokenize(question) if question else '##emptyquestion##')

    # Run inference
    answers_list = inference_helper(prepared_questions)

    # Process answers
    prepared_answers_list = []
    for index, answers in enumerate(answers_list):
        answers = detokenize(answers)
        answers = replace_in_answers(answers, 'answers')
        answers_score = score_answers(answers, 'answers')
        best_index, best_score = get_best_score(answers_score, include_blacklisted)

        if prepared_questions[index] == '##emptyquestion##':
            prepared_answers_list.append(None)
        else:
            prepared_answers_list.append({'answers': answers, 'scores': answers_score, 'best_index': best_index, 'best_score': best_score})

    return prepared_answers_list

Exemplo n.º 10

0

Exibir arquivo

 def process_event(self, e, kb):
     '''
     Tokenize, link entities
     '''
     if e.action == 'message':
         # Lower, tokenize, link entity
         entity_tokens = self.lexicon.link_entity(tokenize(e.data),
                                                  kb=kb,
                                                  scale=True,
                                                  price_clip=4.)
         if entity_tokens:
             return entity_tokens
         else:
             return None
     elif e.action == 'offer':
         data = e.data['price']
         if data is None:
             return None
         price = PriceScaler._scale_price(kb, data)
         entity_tokens = [markers.OFFER, self.price_to_entity(price)]
         return entity_tokens
     elif e.action == 'quit':
         entity_tokens = [markers.QUIT]
         return entity_tokens
     elif e.action == 'accept':
         entity_tokens = [markers.ACCEPT]
         return entity_tokens
     elif e.action == 'reject':
         entity_tokens = [markers.REJECT]
         return entity_tokens
     else:
         raise ValueError('Unknown event action.')

Exemplo n.º 11

0

Exibir arquivo

def get_total_tokens_per_agent(transcript):
    tokens = {0: 0., 1: 0.}
    for event in transcript["events"]:
        if event["action"] == "message":
            msg_tokens = tokenize(event["data"])
            tokens[event["agent"]] += len(msg_tokens)

    return tokens

Exemplo n.º 12

0

Exibir arquivo

 def count_words(cls, examples):
     counts = defaultdict(int)
     for ex in examples:
         for event in ex.events:
             if event.action == 'message':
                 tokens = tokenize(event.data)
                 for token in tokens:
                     counts[token] += 1
     return counts

Exemplo n.º 13

0

Exibir arquivo

    def get_price_trend(cls, price_tracker, chat, agent=None):
        def _normalize_price(seen_price):
            return (float(seller_target) - float(seen_price)) / (
                float(seller_target) - float(buyer_target))

        scenario = NegotiationScenario.from_dict(None, chat['scenario'])
        # print chat['scenario']
        kbs = scenario.kbs
        roles = {
            kbs[0].facts['personal']['Role']: 0,
            kbs[1].facts['personal']['Role']: 1
        }

        buyer_target = kbs[roles[utils.BUYER]].facts['personal']['Target']
        seller_target = kbs[roles[utils.SELLER]].facts['personal']['Target']

        prices = []
        for e in chat['events']:
            if e['action'] == 'message':
                if agent is not None and e['agent'] != agent:
                    continue
                raw_tokens = tokenize(e['data'])
                # link entity
                linked_tokens = price_tracker.link_entity(raw_tokens,
                                                          kb=kbs[e['agent']])
                for token in linked_tokens:
                    if isinstance(token, Entity):
                        try:
                            replaced = PriceScaler.unscale_price(
                                kbs[e['agent']], token)
                        except OverflowError:
                            print "Raw tokens: ", raw_tokens
                            print "Overflow error: {:s}".format(token)
                            print kbs[e['agent']].facts
                            print "-------"
                            continue
                        norm_price = _normalize_price(replaced.canonical.value)
                        if 0. <= norm_price <= 2.:
                            # if the number is greater than the list price or significantly lower than the buyer's
                            # target it's probably not a price
                            prices.append(norm_price)
                # do some stuff here
            elif e['action'] == 'offer':
                norm_price = _normalize_price(e['data']['price'])
                if 0. <= norm_price <= 2.:
                    prices.append(norm_price)
                # prices.append(e['data']['price'])

        # print "Chat: {:s}".format(chat['uuid'])
        # print "Trend:", prices

        return prices

Exemplo n.º 14

0

Exibir arquivo

 def skip_example(cls, example):
     tokens = {0: 0, 1: 0}
     turns = {0: 0, 1: 0}
     for event in example.events:
         if event.action == "message":
             msg_tokens = tokenize(event.data)
             tokens[event.agent] += len(msg_tokens)
             turns[event.agent] += 1
     if tokens[0] < 40 and tokens[1] < 40:
         return True
     if turns[0] < 2 or turns[1] < 2:
         return True
     return False

Exemplo n.º 15

0

Exibir arquivo

Arquivo: modded-bulk-inference.py Projeto: amitguptadumka/nmt-chatbot

def start_inference(question):

    global inference_helper, inference_object

    # Start inference, set global tuple with model, flags and hparams
    inference_object = do_start_inference(out_dir, hparams)

    # First inference() call calls that method
    # Now we have everything running, so replace inference() with actual function call
    inference_helper = lambda question: do_inference(tokenize(question), *inference_object)

    # Rerun inference() call
    return inference_helper(question)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: preprocess.py Projeto: Stanford-ILIAD/Negotiation

    def __init__(self, agent, kb, uuid, model='seq2seq'):
        """
        Dialogue data that is needed by the model.

        :param agent: Index of the agent that will be the buyer in the scenario
        :param kb: Knowledge base for the current agent in the current scenario
        :param uuid: Scenario ID
        :param model: Model Type. Can be seq2seq for the word model, or lf2lf for the coarse dialogue act model

        Note: lf2lf stands for logical form to logical form
        """
        self.uuid = uuid
        self.agent = agent
        self.kb = kb
        self.model = model
        self.agent_to_role = self.get_role_mapping(agent, kb)
        # KB context
        # NOTE: context_to_int will change category, title, description to integers
        self.category_str = kb.category
        self.category = kb.category
        self.title = tokenize(
            re.sub(r'[^\w0-9]', ' ', kb.facts['item']['Title']))
        self.description = tokenize(
            re.sub(r'[^\w0-9]', ' ',
                   ' '.join(kb.facts['item']['Description'])))
        # token_turns: tokens and entitys (output of entity linking)
        self.token_turns = []
        # parsed logical forms
        self.lfs = []
        # turns: input tokens of encoder, decoder input and target, later converted to integers
        self.turns = [[], [], []]
        # entities: has the same structure as turns, non-entity tokens are None
        self.entities = []
        self.agents = []
        self.roles = []
        self.is_int = False  # Whether we've converted it to integers
        self.num_context = None

Exemplo n.º 17

0

Exibir arquivo

def get_avg_tokens_per_agent(transcript):
    tokens = {0: 0., 1: 0.}
    utterances = {0: 0., 1: 0.}
    for event in transcript["events"]:
        if event["action"] == "message":
            msg_tokens = tokenize(event["data"])
            tokens[event["agent"]] += len(msg_tokens)
            utterances[event["agent"]] += 1

    if utterances[0] != 0:
        tokens[0] /= utterances[0]
    if utterances[1] != 0:
        tokens[1] /= utterances[1]

    return tokens

Exemplo n.º 18

0

Exibir arquivo

Arquivo: preprocess.py Projeto: Stanford-ILIAD/Negotiation

 def skip_example(cls, example):
     """
     Skip all examples that do not have enough tokens or turns to be a good example
     :return: True if both agents speak less then 40 tokens of if the dialogue has less than two turns
     """
     tokens = {0: 0, 1: 0}
     turns = {0: 0, 1: 0}
     for event in example.events:
         if event.action == "message":
             msg_tokens = tokenize(event.data)
             tokens[event.agent] += len(msg_tokens)
             turns[event.agent] += 1
     if tokens[0] < 40 and tokens[1] < 40:
         return True
     if turns[0] < 2 or turns[1] < 2:
         return True
     return False

Exemplo n.º 19

0

Exibir arquivo

    def get_speech_acts(self, ex):
        stats = {0: [], 1: []}
        kbs = ex.kbs
        for e in ex.events:
            if e.action != 'message':
                continue

            sentences = self.split_turn(e.data.lower())

            for s in sentences:
                tokens = tokenize(s)
                linked_tokens = self.price_tracker.link_entity(tokens,
                                                               kb=kbs[e.agent])
                act = SpeechActAnalyzer.get_speech_act(s, linked_tokens)
                stats[e.agent].append(act)

        return stats

Exemplo n.º 20

0

Exibir arquivo

    def parse_message(self, event, dialogue_state):
        tokens = self.lexicon.link_entity(tokenize(event.data),
                                          kb=self.kb,
                                          scale=False)
        template = self.extract_template(tokens, dialogue_state)
        utterance = Utterance(raw_text=event.data, tokens=tokens)

        tokens_with_parsed_price = self.parse_prices(tokens, dialogue_state)
        intent = self.classify_intent(utterance, tokens_with_parsed_price,
                                      dialogue_state)

        proposed_price = self.get_proposed_price(tokens_with_parsed_price,
                                                 dialogue_state)
        utterance.lf = LF(intent, price=proposed_price)

        utterance.template = template

        return utterance

Exemplo n.º 21

0

Exibir arquivo

    def parser_stats(self, parsed_dialogues, agent=None):
        stats = {}
        non_entity_vocab = set()
        ents = set()
        stats['intents'] = defaultdict(int)
        intent_utterances = defaultdict(list)

        for dialogue in parsed_dialogues:
            for utterance in dialogue:
                if agent and utterance.agent != agent:
                    continue
                if utterance.tokens is not None:
                    tokens = [
                        x.canonical.type if is_entity(x) else x
                        for x in utterance.tokens
                    ]
                    e = [x.surface for x in utterance.tokens if is_entity(x)]
                    ents.update(e)
                    non_entity_vocab.update(tokens)
                if utterance.lf and utterance.lf.intent != '<start>':
                    stats['intents'][utterance.lf.intent] += 1
                if utterance.text is not None:
                    intent_utterances[utterance.lf.intent].append(
                        tokenize(utterance.text))
        stats['non_entity_vocab_size'] = len(non_entity_vocab)
        #print 'entities:', len(ents)
        #global no_ent_vocab
        #no_ent_vocab = non_entity_vocab
        #for x in all_vocab:
        #    if not x in non_entity_vocab:
        #        print x

        stats['intent_corpus_perplexity'] = self.intent_sequence_perplexity(
            intent_utterances)

        # Percentage intents
        #s = float(sum(stats['intents'].values()))
        #stats['intents'] = sorted(
        #        [(k, v, v / s) for k, v in stats['intents'].iteritems()],
        #        key=lambda x: x[1], reverse=True)

        self.print_stats(stats, 'parser stats')
        return stats

Exemplo n.º 22

0

Exibir arquivo

Arquivo: preprocess.py Projeto: princeton-nlp/cocoa

    def process_event(self, e, kb):
        '''
        Tokenize, link entities
        '''
        from cocoa.core.event import Event
        # Process semi-event
        if not isinstance(e, Event):
            if len(e) < 2 or e[1] is None:
                return e
            else:
                # print('e is:', e)
                e[1] = PriceScaler._scale_price(kb, e[1])
                return e

        if e.action == 'message':
            # Lower, tokenize, link entity
            entity_tokens = self.lexicon.link_entity(tokenize(e.data),
                                                     kb=kb,
                                                     scale=True,
                                                     price_clip=4.)
            if entity_tokens:
                return entity_tokens
            else:
                return None
        elif e.action == 'offer':
            data = e.data['price']
            if data is None:
                return None
            price = PriceScaler._scale_price(kb, data)
            entity_tokens = [markers.OFFER, self.price_to_entity(price)]
            return entity_tokens
        elif e.action == 'quit':
            entity_tokens = [markers.QUIT]
            return entity_tokens
        elif e.action == 'accept':
            entity_tokens = [markers.ACCEPT]
            return entity_tokens
        elif e.action == 'reject':
            entity_tokens = [markers.REJECT]
            return entity_tokens
        else:
            raise ValueError('Unknown event action.')

Exemplo n.º 23

0

Exibir arquivo

    def _get_price_mentions(self, chat, agent=None):
        scenario = NegotiationScenario.from_dict(None, chat['scenario'])
        # print chat['scenario']
        kbs = scenario.kbs

        prices = 0
        for e in chat['events']:
            if agent is not None and e['agent'] != agent:
                continue
            if e['action'] == 'message':
                raw_tokens = tokenize(e['data'])
                # link entity
                linked_tokens = self.price_tracker.link_entity(
                    raw_tokens, kb=kbs[e['agent']])
                for token in linked_tokens:
                    if isinstance(token,
                                  Entity) and token.canonical.type == 'price':
                        prices += 1

        return prices

Exemplo n.º 24

0

Exibir arquivo

    def parse_message(self, event, dialogue_state):
        tokens = self.lexicon.link_entity(
            tokenize(event.data),
            kb=self.kb,
            mentioned_entities=dialogue_state.mentioned_entities,
            known_kb=False)
        utterance = Utterance(raw_text=event.data, tokens=tokens)
        intent = self.classify_intent(utterance)

        exclude_entities = []
        entities = []
        for i, token in enumerate(tokens):
            if is_entity(token):
                if i > 0 and tokens[i - 1] in self.neg_words:
                    exclude_entities.append(token.canonical)
                else:
                    entities.append(token.canonical)

        if len(entities) == 0 and len(exclude_entities) > 0:
            intent = 'negative'

        signature = ''
        if self.is_negative(utterance) and intent == 'inform':
            utterance.ambiguous_template = True
        elif entities:
            signature = self.signature(entities)
        elif exclude_entities:
            signature = self.signature(exclude_entities)

        if intent == 'negative' and not exclude_entities:
            exclude_entities = dialogue_state.my_entities

        lf = LF(intent,
                entities=entities,
                exclude_entities=exclude_entities,
                signature=signature)
        utterance.lf = lf

        utterance.template = self.extract_template(tokens, dialogue_state)

        return utterance

Exemplo n.º 25

0

Exibir arquivo

    def example_stats(self, examples, agent=None):
        stats = {}
        stats['num_dialogues'] = len(examples)
        stats['num_turns_per_dialogue'] = np.mean(
            [len(e.events) for e in examples])
        utterances = [tokenize(e.data) \
                for example in examples \
                    for e in example.events if e.action == 'message' and
                        (not agent or example.agents[e.agent] == agent)]
        stats['num_tokens_per_turn'] = np.mean([len(u) for u in utterances])

        vocab = set()
        for u in utterances:
            vocab.update(u)
        stats['vocab_size'] = len(vocab)
        global all_vocab
        all_vocab = vocab
        stats['corpus_perplexity'] = self.sequence_perplexity(utterances)

        self.print_stats(stats, 'dataset stats')
        return stats

Exemplo n.º 26

0

Exibir arquivo

Arquivo: modded-inference.py Projeto: Hyperclaw79/nmt-chatbot

 def inference_internal(self, question):
     answers = self.do_inference(tokenize(question))
     answers = detokenize(answers)
     answers = replace_in_answers(answers, 'answers')
     answers_rate = score_answers(answers, 'answers')
     return (answers, answers_rate)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: tokenizer_standard.py Projeto: alaning10/ChatBotB

    ['no.AA', 'no. AA'],
    ['Mr. Daniel', 'Mr. Daniel'],
    ['mr. Daniel', 'mr. Daniel'],
    ['Mr.Daniel', 'Mr. Daniel'],
    ['mr.Daniel', 'mr. Daniel'],
    ['mrr. Daniel', 'mrr . Daniel'],
    ['test No.25 test No. 25 test mr. Daniel test', 'test No. 2 5 test No. 2 5 test mr. Daniel test'],
    ['https://www.youtube.com/watch?v=r8b0PWR1qxI', 'https : / / www.youtube.com / watch ? v = r 8 b 0 PWR 1 qxI'],
    ['www.example.com', 'www.example.com'],
    [':)', ': )'],
    ['word...', 'word ...'],
    ['360,678', '3 6 0 , 6 7 8'],
    ['360.678', '3 6 0 . 6 7 8'],
    ['Test phrase. Test phrase.', 'Test phrase . Test phrase .'],
    ['<unk>', ''],
    ['you\'re', 'you \' re'],
    ['you \'re', 'you \' re'],
    ['you\' re', 'you \' re'],
    ['1950\'s', '1 9 5 0 \' s'],
    ['`', '\''],
    ['\'\'', '"'],
    [':/', ': /'],
    ['^^^^^', '^ ^ ^ ^ ^'],
]

init()

for test in tests:
    tokenized = tokenize(test[0])
    print('[{}]  {}  ->  {}{}'.format(Fore.GREEN + 'PASS' + Fore.RESET if tokenized == test[1] else Fore.RED + 'FAIL' + Fore.RESET, test[0], test[1], '' if tokenized == test[1] else '  Result: {}'.format(tokenized)))

Exemplo n.º 28

0

Exibir arquivo

Arquivo: terra-home v0.1.py Projeto: singhgurjyot/terrahome

    return command


print("\nHello, Terra Home here your home assistant. How may I help you?")
engine.say("Hello, Terra Home here, your home assistant. How may I help you?")
engine.runAndWait()
colorama.init()

listen = True

intents = ['song', 'email', 'weather', 'foursquare_explore', 'joke', 'bye']
# QAs
while listen:
    question = myCommand()
    #question = input('> ')
    ques_tokens = tokenize(question)
    #try:
    #intent_resp = intent_classifier.message(question)
    resp = requests.get(url + question,
                        headers={'Authorization': f'Bearer {ACCESS_TOKEN}'})
    intent_resp = json.loads(resp.text)
    intent_resp_str = resp.text

    if 'intents' in intent_resp['entities']:
        intent = intent_resp['intents'][0]['name']
    else:
        intent = 'chat'

    if intent not in intents:
        intent = 'chat'

Exemplo n.º 29

0

Exibir arquivo

 def from_text(cls, raw_text, price_tracker, kb):
     tokens = price_tracker.link_entity(tokenize(raw_text),
                                        kb=kb,
                                        scale=False)
     return cls(raw_text, tokens)