示例#1
0
from DB import DB
import json
import bz2

db = DB()
db.loadMails()
db.tokenize()
db.stem()
db.phrases()

mails = {}
for m in db.mails:
    d = m.__dict__
    d['sents'] = list(d['sents'])
    for k,v in d.items():
        if type(v) is set:
            d[k] = list(v)
    mails[d['mailfile']] = d

json.dump(mails, bz2.open('mails.json.bz2','wt'))
json.dump(db.stemmer.termMap, bz2.open('StemmerTermMap.json.bz2','wt'))