class LMModel: def __init__(self, seqExtractor, n = 2): self.words = None self.seqExtractor = seqExtractor self.lm0 = KneserNeyLM(n) self.lm1 = KneserNeyLM(n) def train(self, rows): self.seqExtractor.train(rows) self.lm0.train([self.seqExtractor.extract(row) for row in rows if not row.insult]) self.lm1.train([self.seqExtractor.extract(row) for row in rows if row.insult]) def classify1(self, row): seq = self.seqExtractor.extract(row) w = 0.0 + self.lm1.score(seq) - self.lm0.score(seq) if 100 < w: w = 100 if w < -100: w = -100 return 1.0/(1.0 + math.exp(-w)) def classify(self, rows): return array([self.classify1(row) for row in rows])
def __init__(self, seqExtractor, n = 2): self.words = None self.seqExtractor = seqExtractor self.lm0 = KneserNeyLM(n) self.lm1 = KneserNeyLM(n)