def tag(text1): try: from artagger import Tagger except ImportError: from pythainlp.tools import install_package install_package( 'https://github.com/wannaphongcom/artagger/archive/master.zip' ) try: from artagger import Tagger except ImportError: print( "Error ! using 'pip install https://github.com/wannaphongcom/artagger/archive/master.zip'" ) sys.exit(0) words = Tagger().tag(' '.join(text1)) totag = [] for word in words: totag.append((word.word, word.tag)) return totag
Code by https://github.com/cstorm125/thai2vec/tree/master/notebook ''' from __future__ import absolute_import,unicode_literals import os import sys import re import torch #numpy and fastai try: import numpy as np from fastai.text import * import dill as pickle except ImportError: from pythainlp.tools import install_package install_package('fastai') install_package('numpy') try: import numpy as np from fastai.text import * import dill as pickle except ImportError: print("Error installing using 'pip install fastai numpy dill'") sys.exit(0) #import torch try: import torch except ImportError: print('PyTorch required. See https://pytorch.org/.')
# -*- coding: utf-8 -*- ''' Code by https://github.com/cstorm125/thai2vec/tree/master/notebook ''' from __future__ import absolute_import,unicode_literals import os import sys from collections import defaultdict #numpy and dill try: import numpy as np import dill as pickle except ImportError: from pythainlp.tools import install_package install_package('numpy') install_package('dill') try: import numpy as np import dill as pickle except ImportError: print("Error installing using 'pip install numpy dill'") sys.exit(0) #import torch try: import torch except ImportError: print('PyTorch required. See https://pytorch.org/.') import torch from torch.autograd import Variable
# -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals import sys try: import deepcut except ImportError: '''ในกรณีที่ยังไม่ติดตั้ง deepcut ในระบบ''' from pythainlp.tools import install_package install_package("deepcut") try: import deepcut except ImportError: raise Exception("ImportError ! using pip install deepcut") def segment(text): return deepcut.tokenize(text)
# -*- coding: utf-8 -*- from __future__ import absolute_import,print_function,unicode_literals import sys import re try: import icu except ImportError: from pythainlp.tools import install_package install_package('pyicu') try: import icu except ImportError: sys.exit('Error ! using pip install pyicu') def gen_words(text): bd = icu.BreakIterator.createWordInstance(icu.Locale("th")) bd.setText(text) p = bd.first() for q in bd: yield text[p:q] p = q def segment(text): text = re.sub("([^\u0E00-\u0E7F\n ]+)"," \\1 ",text) return list(gen_words(text)) if __name__ == "__main__": print(segment('ทดสอบระบบตัดคำด้วยไอซียู')) print(segment('ผมชอบพูดไทยคำ English')) print(segment('ผมชอบพูดไทยคำEnglishคำ')) print(segment("""ผมชอบพูดไทยคำEnglish540 บาท""")) print(segment('ประหยัด ไฟเบอห้า'))
# -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals, print_function import sys import six if six.PY2: print("Thai sentiment in pythainlp. Not support Python 2") sys.exit(0) try: from wordcut import Wordcut except ImportError: ''' ในกรณีที่ยังไม่ติดตั้ง wordcutpy ในระบบ ''' from pythainlp.tools import install_package install_package('wordcutpy') try: from wordcut import Wordcut except ImportError: sys.exit('Error ! using $ pip install wordcutpy') def segment(text, data=None): if not data: wordcut = Wordcut.bigthai() else: word_list = list(set(data)) wordcut = Wordcut(word_list) return wordcut.tokenize(text)
# -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals import sys try: import icu except ImportError: from pythainlp.tools import install_package install_package('pyicu') try: import icu except ImportError: sys.exit('Error ! using pip install pyicu') # ถอดเสียงภาษาไทยเป็น Latin def romanization(data): """เป็นคำสั่ง ถอดเสียงภาษาไทยเป็น Latin รับค่า ''str'' ข้อความ คืนค่าเป็น ''str'' ข้อความ Latin""" thai2latin = icu.Transliterator.createInstance('Thai-Latin') return thai2latin.transliterate(data)
# -*- coding: utf-8 -*- from __future__ import absolute_import,unicode_literals,print_function import sys import six if six.PY2: print("Thai sentiment in pythainlp. Not support Python 2") sys.exit(0) try: from wordcut import Wordcut except ImportError: ''' ในกรณีที่ยังไม่ติดตั้ง wordcutpy ในระบบ ''' from pythainlp.tools import install_package install_package('wordcutpy') try: from wordcut import Wordcut except ImportError: sys.exit('Error ! using $ pip install wordcutpy') def segment(text, data=None): if not data: wordcut = Wordcut.bigthai() else: word_list = list(set(data)) wordcut = Wordcut(word_list) return wordcut.tokenize(text)
# -*- coding: utf-8 -*- from __future__ import absolute_import,unicode_literals # NLP import re from pythainlp.tokenize import word_tokenize from pythainlp.tag import pos_tag from pythainlp.corpus import stopwords thaicut="newmm" # ตัวตัดคำ # CRF try: import sklearn_crfsuite except ImportError: from pythainlp.tools import install_package install_package('sklearn-crfsuite') import sklearn_crfsuite # FILE import glob import codecs from pythainlp.corpus import get_file,download stopwords = stopwords.words('thai') def isThai(chr): # เช็คว่าเป็น char ภาษาไทย cVal = ord(chr) if(cVal >= 3584 and cVal <= 3711): return True return False def isThaiWord(word): # เช็คว่าเป็นคำภาษาไทย t=True for i in word:
Code by https://github.com/cstorm125/thai2vec/tree/master/notebook ''' from __future__ import absolute_import, unicode_literals import os import sys import re import torch #numpy and fastai try: import numpy as np from fastai.text import * import dill as pickle except ImportError: from pythainlp.tools import install_package install_package('fastai') install_package('numpy') try: import numpy as np from fastai.text import * import dill as pickle except ImportError: print("Error installing using 'pip install fastai numpy dill'") sys.exit(0) #import torch try: import torch except ImportError: print('PyTorch required. See https://pytorch.org/.')
# -*- coding: utf-8 -*- from __future__ import print_function try: import numpy as np import keras except ImportError: from pythainlp.tools import install_package install_package('keras') install_package('numpy') from pythainlp.corpus import get_file,download from keras.models import Model, load_model from keras.layers import Input import numpy as np class thai2rom: def __init__(self): ''' Thai2Rom ''' self.batch_size = 64 self.epochs = 100 self.latent_dim = 256 self.num_samples = 648241 self.data_path = get_file('thai2rom-dataset') if self.data_path==None: download('thai2rom-dataset') self.data_path = get_file('thai2rom-dataset') self.input_texts = [] self.target_texts = []
# -*- coding: utf-8 -*- from __future__ import absolute_import,unicode_literals import sys try: import deepcut except ImportError: '''ในกรณีที่ยังไม่ติดตั้ง deepcut ในระบบ''' from pythainlp.tools import install_package install_package('deepcut') try: import deepcut except ImportError: sys.exit('Error ! using pip install deepcut') def segment(text): return deepcut.tokenize(text)
# -*- coding: utf-8 -*- ''' Code by https://github.com/cstorm125/thai2vec/blob/master/notebooks/examples.ipynb ''' from __future__ import absolute_import, unicode_literals import six import sys if six.PY2: print("Thai sentiment in pythainlp. Not support python 2.7") sys.exit(0) try: from gensim.models import KeyedVectors import numpy as np except ImportError: from pythainlp.tools import install_package install_package('gensim') install_package('numpy') try: from gensim.models import KeyedVectors import numpy as np except ImportError: print("Error ! using 'pip install gensim numpy'") sys.exit(0) from pythainlp.tokenize import word_tokenize from pythainlp.corpus import get_file from pythainlp.corpus import download as download_data import os def download(): path = get_file('thai2vec02')
# -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals import sys try: import deepcut except ImportError: '''ในกรณีที่ยังไม่ติดตั้ง deepcut ในระบบ''' from pythainlp.tools import install_package install_package('deepcut') try: import deepcut except ImportError: sys.exit('Error ! using pip install deepcut') def segment(text): return deepcut.tokenize(text)
import re import sys from pythainlp.corpus import download, get_file from pythainlp.tokenize import word_tokenize # numpy and fastai try: import numpy as np from fastai.text import * import dill as pickle except ImportError: from pythainlp.tools import install_package install_package("fastai") install_package("numpy") try: import numpy as np from fastai.text import * import dill as pickle except ImportError: print("Error installing using 'pip install fastai numpy dill'") sys.exit(0) # import torch try: import torch except ImportError: print("PyTorch required. See https://pytorch.org/.")
# -*- coding: utf-8 -*- from __future__ import absolute_import,unicode_literals import sys try: from pylexto import LexTo except ImportError: from pythainlp.tools import install_package install_package('https://github.com/wannaphongcom/pylexto/archive/master.zip') try: from pylexto import LexTo except ImportError: sys.exit('Error ! using pip install https://github.com/wannaphongcom/pylexto/archive/master.zip') def segment(text,full=False): lexto = LexTo() words, types = lexto.tokenize(text) if full==True: return (words,types) else: return words
# -*- coding: utf-8 -*- ''' Code by https://github.com/cstorm125/thai2vec/blob/master/notebooks/examples.ipynb ''' from __future__ import absolute_import,unicode_literals import six import sys if six.PY2: print("Thai sentiment in pythainlp. Not support python 2.7") sys.exit(0) try: from gensim.models import KeyedVectors import numpy as np except ImportError: from pythainlp.tools import install_package install_package('gensim') install_package('numpy') try: from gensim.models import KeyedVectors import numpy as np except ImportError: print("Error ! using 'pip install gensim numpy'") sys.exit(0) from pythainlp.tokenize import word_tokenize from pythainlp.corpus import get_file from pythainlp.corpus import download as download_data import os def download(): path = get_file('thai2vec02') if path==None:
# -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals import sys try: from pylexto import LexTo except ImportError: from pythainlp.tools import install_package install_package( 'https://github.com/wannaphongcom/pylexto/archive/master.zip') try: from pylexto import LexTo except ImportError: sys.exit( 'Error ! using pip install https://github.com/wannaphongcom/pylexto/archive/master.zip' ) def segment(text, full=False): lexto = LexTo() words, types = lexto.tokenize(text) if full == True: return (words, types) else: return words
# -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals, print_function import sys import six try: from wordcut import Wordcut except ImportError: ''' ในกรณีที่ยังไม่ติดตั้ง wordcutpy ในระบบ ''' from pythainlp.tools import install_package install_package("wordcutpy") try: from wordcut import Wordcut except ImportError: raise Exception("ImportError ! using $ pip install wordcutpy") def segment(text, data=None): if not data: wordcut = Wordcut.bigthai() else: word_list = list(set(data)) wordcut = Wordcut(word_list) return wordcut.tokenize(text)
# -*- coding: utf-8 -*- ''' Code by https://github.com/cstorm125/thai2vec/tree/master/notebook ''' from __future__ import absolute_import, unicode_literals import os import sys from collections import defaultdict #numpy and dill try: import numpy as np import dill as pickle except ImportError: from pythainlp.tools import install_package install_package('numpy') install_package('dill') try: import numpy as np import dill as pickle except ImportError: print("Error installing using 'pip install numpy dill'") sys.exit(0) #import torch try: import torch except ImportError: print('PyTorch required. See https://pytorch.org/.') import torch from torch.autograd import Variable
# -*- coding: utf-8 -*- from __future__ import print_function try: import numpy as np import keras except ImportError: from pythainlp.tools import install_package install_package('keras') install_package('numpy') from pythainlp.corpus import get_file, download from keras.models import Model, load_model from keras.layers import Input import numpy as np class thai2rom: def __init__(self): ''' Thai2Rom ''' self.batch_size = 64 self.epochs = 100 self.latent_dim = 256 self.num_samples = 648241 self.data_path = get_file('thai2rom-dataset') if self.data_path == None: download('thai2rom-dataset') self.data_path = get_file('thai2rom-dataset')