}], ["utf-8:count:null", "123Б測試", { "COUNT": 6 }], ["utf-8:count#blah:null", "123Б測試", { "BLAH": 6 }], ["utf-8:count#for=lala&for=cjk:null", "123Б測a試bc", { "COUNT": 2 }], ] passed = True for c, i, o in iotest: p = Bsdconv(c) if not p: print(Bsdconv.error()) print("Test failed at %s" % repr([c, i, o])) del p passed = False continue r = p.conv(i) if o != r: print("Test failed at %s" % repr([c, i, o])) print("expected(%d): %s" % (len(o), repr(o))) print("result(%d): %s" % (len(r), repr(r))) passed = False del p for c, d, i in countertest:
#!/usr/bin/env python #mkbonus.py src_list char_list phrase_list import sys import re from bsdconv import Bsdconv clist=open(sys.argv[2], "w") plist=open(sys.argv[3], "w") sc=Bsdconv("utf-8:score#with=cjk:null") bcv=Bsdconv("utf-8:insert#after=002c:bsdconv-keyword,bsdconv") bcv_zhtw=Bsdconv("utf-8:zhtw:insert#after=002c:bsdconv-keyword,bsdconv") sep=re.compile(r"\s+") f=open(sys.argv[1]) for l in f: l = l.strip() if l == "": continue if l.startswith("#"): clist.write(l+"\n") plist.write(l+"\n") a = sep.split(l) p = a[0] ln = len(p.decode("utf-8")) if ln > 1: bonus = 6 p = bcv_zhtw.conv(p).rstrip(",")
#!/usr/bin/env python import os import sys from bsdconv import Bsdconv a = Bsdconv.mktemp("score.XXXXXX") os.unlink(a[1]) clist = Bsdconv.fopen("characters_list.txt", "w+") p = Bsdconv("utf-8:score-train:null") if not p: print(Bsdconv.error()) del p sys.exit() p.ctl(Bsdconv.CTL_ATTACH_SCORE, a[0], 0) p.ctl(Bsdconv.CTL_ATTACH_OUTPUT_FILE, clist, 0) p.init() f = open(sys.argv[1]) s = f.read(1024) while s: p.conv_chunk(s), s = f.read(1024) p.conv_chunk_last(s) f.close()
#!/usr/bin/env python import sys from bsdconv import Bsdconv p = Bsdconv(sys.argv[1]) if not p: print(Bsdconv.error()) del p sys.exit() p.conv_file(sys.argv[2], sys.argv[3]) print(p) print(p.counter()) del p
# -*- coding: utf-8 -*- # python nfkc_gen.py '⁰¹²³'|sort|uniq import sys from bsdconv import Bsdconv nfkc = Bsdconv("utf-8:nfkc:utf-8") i = sys.argv[1].decode("utf-8") for c in i: c = c.encode("utf-8") d = nfkc.conv(c) if c == d: continue print("{}\t{}".format(d, c))
class Crawler(object): convert = Bsdconv("ansi-control,byte:big5-defrag:byte,ansi-control|skip,big5:utf-8,bsdconv_raw") def __init__(self, host): self.host = host self.delay = 0 self.conn = Telnet(host, 3456) self.screen = pyte.Screen(80, 24) self.stream = pyte.Stream() self.screen.mode.discard(pyte.modes.LNM) self.stream.attach(self.screen) self.display self.login() self.enter_board('NCTU-Teacher') for i in range(input('n - ' + self.last_id + ': '), int(self.last_id) + 1): self.get_article(i) @property def display(self): s = self.conn.read_very_eager() while not s: s = self.conn.read_very_eager() time.sleep(self.delay) s = self.convert.conv(s) self.stream.feed(s.decode('utf-8')) return self.screen_shot @property def screen_shot(self): return "\n".join(self.screen.display).encode("utf-8") def close(self): self.conn.close() def send_enter(self, count=1): for i in range(count): s = self.send('\r') if count == 1: return s def send(self, s): self.conn.write(s) ret = self.display return ret def login(self): username = '******' self.conn.write(username + '\r') self.conn.write('\rYY\r') self.send_enter(2) def enter_board(self, board): ''' Save current board name in self.board and lastest article_id in self.last_id ''' self.send('OBOC') self.send('s{}'.format(board)) self.send_enter(2) line = self.screen.cursor.y self.last_id = re.search(r'(?P<last_id>^\d+) ', self.screen.display[line].strip()).group() self.board = board def get_article(self, num=None): if not num: return self.send('{}\rOC'.format(num)) raw_artcle = self.screen.display[:-1] status_line = self.screen.display[-1] if status_line.find('[Y/n]') != -1: self.send('n') while status_line.find('(100%)') == -1: self.send('OB') status_line = self.screen.display[-1] raw_artcle.append(self.screen.display[-2]) self.save_article(num, raw_artcle) def term_comm(feed=None, wait=None): if feed != None: self.conn.write(feed) if wait: s = self.conn.read_some() s = self.convert.conv_chunk(s) self.stream.feed(s.decode("utf-8")) if wait != False: time.sleep(0.1) s = self.conn.read_very_eager() s = self.convert.conv_chunk(s) self.stream.feed(s.decode("utf-8")) ret = "\n".join(self.screen.display).encode("utf-8") return ret def save_article(self, num, content): ''' :param content: a list get from screen ''' chinese_keyword = { 'board': '看板', } author_line = content[0].encode('utf-8').split() if not chinese_keyword['board'] in author_line: return _i = author_line.index(chinese_keyword['board']) author = ' '.join(author_line[1:_i]) title_line = content[1].encode('utf-8').split()[1:] title = ' '.join(title_line) time_line = content[2].encode('utf-8').split()[1:] time = ' '.join(time_line) if not time.find('(') == -1: time = time[time.find('(') + 1:time.find(')')] time = time.split() time.pop(1) time = ' '.join(time) print time article = '\n'.join(content[3:]).encode('utf-8') try: post = Teacher.get(bbs_id=num) post.content = article post.save() logger.info('Update: {id}'.format(id=num)) except Teacher.DoesNotExist: post = Teacher.create(author=author, title=title, pub_time=time, content=article, bbs_id=num ) logger.info('Insert: {id}'.format(id=num))