def __init__(self, model_dir): print(f'[*] Creating CKIP tokenizer from {model_dir}...', end='', flush=True) self._ws = ckiptagger.WS(model_dir) self._pos = ckiptagger.POS(model_dir) self._pos_punc_class_suffix = 'CATEGORY' print('done')
def _init(self): import ckiptagger if self._recommend_lexicons: self._opts['recommend_dictionary'] = ckiptagger.construct_dictionary(self._recommend_lexicons) if self._coerce_lexicons: self._opts['coerce_dictionary'] = ckiptagger.construct_dictionary(self._coerce_lexicons) self._core = ckiptagger.WS(_get_tagger_data(), disable_cuda=self._disable_cuda)
def load_model(self): self.download_model_files() return ckiptagger.WS(self.data_dir, disable_cuda=self.disable_cuda)
#!/usr/bin/python3 from requests import get from bs4 import BeautifulSoup as bs from urllib.parse import unquote from sklearn.feature_extraction.text import TfidfVectorizer import cgi, json, datetime, ckiptagger, pickle, lzma, sys, codecs field = cgi.FieldStorage() content, models, vocabs, corpus = [], {}, {}, {} code_to_name = dict( i.decode('utf-8').strip().split(',') for i in open('stock_codec.csv', 'rb')) sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach()) try: ws = ckiptagger.WS('./data', True) except Exception as e: ckiptagger.data_utils.download_data_url('./') ws = ckiptagger.WS('./data', True) try: src, stock = field.getvalue('src').split(','), field.getvalue( 'stock').split(',') #src, stock = ['ctee'], ['2330'] except: print( 'content-type:text/plain;charset:utf-8\n\nNo newspaper source or stock specified.' ) exit() try: