def getff_tm(self, fn, enc='utf-8', dlm='\t', mdlm=None): mdlm = mdlm and mdlm or self.mdlm for l in utils.get_lines(fn, enc, strip=1): # morpheme mapping [t1, t2] = l.split(dlm) self.tm[t1] = self.tm.get(t1, {}) self.tm[t1][t2] = 1
def getff_pc(self, fn, enc=None, pdlm=None, ph=None): enc = enc or self.de pdlm = pdlm or self.pc_dlm ph = ph or self.ph cats = {} for l in utils.get_lines(fn, enc, strip=1): if l.startswith(ph): if not cats: for i, cat in enumerate(l[1:].split(self.pc_dlm)): cats[cat] = i continue elif not cats: continue vals = l.split(pdlm) lyses = vals[cats['[lyses]']:] self.pc[vals[0]] = lyses
def getff_lkp(self, fn, enc=None, dlm=None): enc = enc or self.de dlm = dlm or self.gen_dlm for l in utils.get_lines(fn, enc, strip=1): [sf, tg, cnt] = l.split(dlm) self.lkp[sf] = self.lkp.get(sf, []) + [tg]
def getff_unts(self, fn, enc='utf-8'): self.unts = utils.get_lines(fn, enc, strip=1)
def getff_sfx(self, fn, enc='utf-8', dlm='\t', mdlm=None): for l in utils.get_lines(fn, enc, strip=1): [sf, sfx] = l.split(dlm) self.sfx[sf] = self.sfx.get(sf, {}) self.sfx[sf][sfx] = 1