Exemplo n.º 1
0
def main():
	u = Updater(0.5)
	with open(sys.argv[1],'r') as f:
		last = update_pool(get_a_line(f.readline()))
		lines = f.readlines()
		u.start()
		for l in lines:
			r = get_a_line(l)
			if equal(r,last):
				last['data']=last['data']+r['data']
			else:
				result.append(last)
				last = r
				last = update_pool(last)
				u.output = '%s.%s%s%s'%(len(result),last['city'],last['area'],last['road'])
		pool_dup = sorted(pool,reverse=True,key=lambda x:len(x))
		for r in result:
			r['feat']=find_feature(pool_dup,r['city']+r['area']+r['road'])
			u.output = 'changing:%s%s%s,%s'%(r['city'],r['area'],r['road'],r['feat'])
		u.stop()
		print 'writing files'
		po = codecs.open('word_pool.js','w','utf-8')
		po.write(u'var pool=')
		po.write(json.dumps(pool_dup,encoding='utf8',ensure_ascii=False))
		po.write(u';')
		po.close()
		ro = codecs.open('data.js','w','utf-8')
		ro.write(u'var database=')
		ro.write(json.dumps(result,encoding='utf8',ensure_ascii=False))
		ro.write(u';')
		ro.close()
Exemplo n.º 2
0
def main():
	u = Updater(0.5)
	extracted = {}
	with codecs.open(sys.argv[1],'r','utf8') as f:
		lines = f.readlines()
		u.start()
		for l in lines:
			for n in l:
				if n!=u'\n' and n!=u'\u3000' and n!=u'\r':
					extracted[n]=1
					u.output=n
		u.stop()
	op = codecs.open('extracted.txt','w','utf-8')
	k = extracted.keys()
	k.sort()
	for c in k:
		op.write(c)
	op.close()