def test(): for testn in sorted(listdir('cases')): if not exists(join('cases', testn, 'input')): continue print print '-' * 40 print print 'Running test', testn, '...' with codecs.open(join('cases', testn, 'input'), 'r', 'utf-8') as f: inp = f.read() print 'input size:', len(inp) parserules = True for var in VALIDVARIANTS: print print 'testing', var, '...', start = time() try: convhandler = ConverterHandler(var) outp = convhandler.convert(inp, parserules) except Exception: print 'error occurred.' print_exc() else: print '%.3f sec.' % (time() - start) if exists(join('cases', testn, 'output.' + var)): print 'comparing converter\'s output with expected output ...' with codecs.open(join('cases', testn, 'output.' + var), 'r', 'utf-8') as f: cp = f.read() if cp == outp: print 'OK' else: print 'FAILED'
def genLoc(filePath): c = open(filePath) conv = ConverterHandler('zh-hant') word = u'女装连衣裙' for x in xrange(100000): line = word if len(line): keyword = '' keyword = urllib.quote(conv.convert(line.strip()).encode('utf-8')) if not len(keyword): continue url = 'http://example.com/?q=%s' % keyword yield url
DB = MySQLdb.connect(user="******",passwd="nishixian", db="kancolle_wiki",unix_socket="/opt/lampp/var/mysql/mysql.sock",charset="utf8") cursor = DB.cursor() query = "SELECT ship_id,ship_name FROM kancolle_ship_info"; conv_hack = [(u'巻',u'卷'), (u'蔵',u'藏'), (u'黒',u'黑'), (u'暁',u'晓'), (u'満',u'满'), (u'皐',u'皋'), (u'歳',u'岁')] if cursor.execute(query) > 0: result = list(cursor.fetchall()) for item in result: kanid = item[0] kanname = item[1] convhandler = ConverterHandler('zh-hans') output = convhandler.convert(kanname,False) for conv in conv_hack: output = output.replace(conv[0],conv[1]) print kanname,'==>',output query = "UPDATE kancolle_ship_info SET ship_name_sim = '%s' WHERE ship_id = %s" % (output, kanid) try: n = cursor.execute(query) except Exception: errcode,errordesc = e print 'Error!',errordesc DB.rollback() else: if n>0 : print 'Convert Success!' DB.commit() else: