Python split примеры использования

Язык программирования: Python

Пространство имен/Пакет: pythai

Метод/Функция: split

Примеров на hotexamples.com: 13

Python split - 13 примеров найдено. Это лучшие примеры Python кода для pythai.split, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: build_corpus.py Проект: godjealous/code_type_recognition

def word_segment(sent):
    '''
    Args:
      sent: A string. A sentence.
    
    Returns:
      A list of words.
    '''
    global lcode
    if lcode in ['ko']:
        words = [word for word, _ in kkma.pos(sent)]
    elif lcode in ['ja']:
        words = mecab.parse(sent.encode('utf8')).split()
    elif lcode in ['th']:
        words = pythai.split(sent)
    elif lcode in ['vi']:
        words = ViTokenizer.tokenize(sent).split()
    elif lcode in ['zh']:
        words = list(jieba.cut(sent, cut_all=False))


#     elif lcode in ['ar']:
#         words = segmenter.segment(sent).split()
    else:  # Mostly european languages
        words = sent.split()

    return words

Пример #2

Показать файл

Файл: build_corpus.py Проект: wiwengweng/wordvectors

def word_segment(sent):
    '''
    Args:
      sent: A string. A sentence.
    
    Returns:
      A list of words.
    '''
    global lcode
    if lcode in ['ko']:
        words = [word for word, _ in kkma.pos(sent)]
    elif lcode in ['ja']:
        words = mecab.parse(sent.encode('utf8')).split() 
    elif lcode in ['th']:
        words = pythai.split(sent)
    elif lcode in ['vi']:
        words = ViTokenizer.tokenize(sent).split()        
    elif lcode in ['zh']:
        words = list(jieba.cut(sent, cut_all=False)) 
#     elif lcode in ['ar']:
#         words = segmenter.segment(sent).split()
    else: # Mostly european languages
        words = sent.split()
    
    return words

Пример #3

Показать файл

Файл: tagger.py Проект: malps13/thai-language

def analyze_sentence(sentence, is_thai, dictionary):
    se = etree.Element(u'se')
    if is_thai:
        tokens = pythai.split(sentence)
        for token in tokens:
            if token:
                word = etree.Element(u'w')
                if token in dictionary:
                    for analysis_number in dictionary[token]:
                        analysis = dictionary[token][analysis_number]
                        ana = etree.Element(u'ana')
                        ana.attrib[u'lex'] = token
                        ana.attrib[u'pos'] = u','.join(analysis[1])
                        ana.attrib[u'trans'] = analysis[0]
                        ana.attrib[u'translit'] = analysis[2]
                        word.append(ana)
                word.text = token
                se.append(word)
    else:
        sentence = u' '.join(sentence.split())
        sentence = sentence.replace(u'\t', u'')
        sentence = sentence.replace(u'\r\n', u'')
        sentence = sentence.replace(u'\n', u'')
        if sentence:
            word = etree.Element(u'w')
            ana = etree.Element(u'ana')
            ana.attrib[u'lex'] = u''
            ana.attrib[u'pos'] = u''
            ana.attrib[u'trans'] = u''
            ana.attrib[u'translit'] = u''
            word.append(ana)
            word.text = sentence
            se.append(word)
    return se

Пример #4

Показать файл

Файл: thai_tagger.py Проект: Tann11/thai-language

def tag_text(text, dictionary): ##hmmm...
    #print u"###", text
    result = [u"<body>"]
    sents = text.split()
    for i in sents:
        result.append(u"<se>")
        for j in pythai.split(i):
            result.append(tag_word(j, dictionary))
        result.append(u"</se>")
    result.append(u"</body>")
    return create_xml(result)

Пример #5

Показать файл

Файл: old_thai_tagger.py Проект: malps13/thai-language

def tag_text(text, dictionary):  ##hmmm...
    #print u"###", text
    result = [u"<body>"]
    sents = text.split()
    for i in sents:
        result.append(u"<se>")
        for j in pythai.split(i):
            result.append(tag_word(j, dictionary))
        result.append(u"</se>")
    result.append(u"</body>")
    return create_xml(result)

Пример #6

Показать файл

 def test_split(self):
     for sentence in self.test_sentences:
         six.print_(sentence.split,
                    ' '.join(pythai.split(sentence.sentence)))
         self.assertEqual(' '.join(pythai.split(sentence.sentence)),
                          sentence.split)

Пример #7

Показать файл

Файл: tests.py Проект: Godlil2e/pythai

 def test_split(self):
     for sentence in self.test_sentences:
         print sentence.split, ' '.join(pythai.split(sentence.sentence))
         self.assertEqual(' '.join(pythai.split(sentence.sentence)), sentence.split)

Пример #8

Показать файл

Файл: temp.py Проект: malps13/thai-language

def pos(string):
    tokens = pythai.split(string)
    return tokens

Пример #9

Показать файл

Файл: testPythai.py Проект: benathi/TwitterCodeSwitching

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pythai

print pythai.split(u"การที่ได้ต้องแสดงว่างานดี")
print pythai.split(u"ฉันกินข้าว")
print pythai.split(u"ฉันwantกินseeข้าว")

Пример #10

Показать файл

import pythai

print pythai.split(u"การที่ได้ต้องแสดงว่างานดี")

Пример #11

Показать файл

Файл: Process1_pyThaiTokenization.py Проект: benathi/TwitterCodeSwitching

def textToSegmentedList(sentence):
    # change later
    #return sentence.split(u' ')
    return pythai.split(sentence)

Пример #12

Показать файл

Файл: BigramSplitter.py Проект: RadioFreeAsia/Products.BigramSplitter-1.0

def pythai_split(u, limit=1):
    """ 
    Using PyThai to split thai words
    """
    return pythai.split(u)

Пример #13

Показать файл

Файл: dummy_texts.py Проект: malps13/thai-language

def token_iterator(sentence):
    tokens = pythai.split(sentence)
    for token in tokens:
        yield token