Python tokenize_string示例

编程语言: Python

命名空间/包名称: intent.utils.token

方法/功能: tokenize_string

hotexamples.com的示例: 8

Python tokenize_string - 已找到8个示例。这些是从开源项目中提取的最受好评的intent.utils.token.tokenize_string现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： heur_tests.py 项目： rgeorgi/intent

    def setUp(self):
        self.inst = raw_txt_to_inst("balhd       dfds dkkdf ldf\n" +
                                    "experiment  this is      a\n" +
                                    "this        is    a      test")

        self.gloss_tokens = tokenize_string("experiment  this is      a")
        self.trans_tokens = tokenize_string("this        is    a      test")

        self.a = Alignment([(2,3),(1,2),(3,4)])
        self.a2 = Alignment([(2,3),(1,2),(3,4),(4,1)])

示例#2

显示文件

文件： parsing.py 项目： rgeorgi/intent

def create_words_tier_from_string(string):
    tokens = tokenize_string(string, tokenizer=whitespace_tokenizer)
    wt = Tier(type=WORDS_TYPE)
    for token in tokens:
        i = Item(id=ask_item_id(wt), text=token.value())
        wt.append(i)
    return wt

示例#3

显示文件

文件： extraction.py 项目： rgeorgi/intent

 def add_word_tag(self, word, tag, prev_word=None, next_word=None, count=1):
     for subword in tokenize_string(word, tokenizer=morpheme_tokenizer):
         subword = subword.seq
         if tag not in self.sw_dict[subword].keys():
             self.sw_dict[subword][tag] = {'contexts':[(prev_word,next_word)], 'count':count}
         else:
             self.sw_dict[subword][tag]['contexts'].append((prev_word, next_word))
             self.sw_dict[subword][tag]['count'] += count

示例#4

显示文件

文件： test_taggers.py 项目： rgeorgi/intent

def remove_tags(source_path, target_path):
    source_f = open(source_path, 'r', encoding='utf-8')
    target_f = open(target_path, 'w', encoding='utf-8')

    for line in source_f:
        tokens = tokenize_string(line, tokenizer=tag_tokenizer)
        target_f.write(tokens.text()+'\n')

    source_f.close()
    target_f.close()

示例#5

显示文件

文件： cleaning_tests.py 项目： rgeorgi/intent

    def clean_new_trans_test(self):
        orig = '"I don\'t understand any of it; I don\'t understand it at all"'
        expected = " I don\'t understand any of it; I don\'t understand it at all "
        result = clean_trans_string(orig)

        self.assertEqual(result, expected)

        tokenized          = tokenize_string(result, tokenizer=sentence_tokenizer).text()
        tokenized_expected = "I don't understand any of it ; I don't understand it at all"
        self.assertEqual(tokenized, tokenized_expected)

示例#6

显示文件

文件： Alignment.py 项目： rgeorgi/intent

    def from_giza_lines(cls, tgt, aln):
        """
        Return the target-to-source alignment from the target and aln lines
        of giza.
        """
        # Start by getting the target tokens from the provided target line
        tgt_tokens = tokenize_string(tgt, whitespace_tokenizer)

        # next, read the alignments from the aln line.
        a = Alignment.from_giza(aln)

        # Finally, the source tokens are also on the aln line.
        alignments = re.findall('(\S+) \(\{(.*?)\}\)', aln)

        # Get the src tokens...
        src_tokens = [a[0] for a in alignments[1:]]

        # And create the aln sent.
        aln_sent = cls(src_tokens, tgt_tokens, a)
        return aln_sent

示例#7

显示文件

文件： stanford_tagger.py 项目： rgeorgi/intent

def stanford_stdout_handler(output, queue):
    queue.append(tokenize_string(output, tokenizer=tag_tokenizer))

示例#8

显示文件

文件： Alignment.py 项目： rgeorgi/intent

    def runTest(self):
        s1 = tokenize_string('This is a test sentence')
        s2 = tokenize_string('test sentence this is')
        a = Alignment([(1,3),(2,4),(4,1),(5,2)])

        a_sent = AlignedSent(s1, s2, a)