def __init__(self, order=3, sb="<s>", se="</s>", raw=False, ml=False): self.sb = sb self.se = se self.raw = raw self.ml = ml self.order = order self.ngrams = NGramStack(order=order) self.counts = [defaultdict(float) for i in xrange(order)]
def __init__(self, order=3, sb="<s>", se="</s>"): self.sb = sb self.se = se self.order = order self.ngrams = NGramStack(order=order) self.denominators = [defaultdict(float) for i in xrange(order - 1)] self.numerators = [defaultdict(float) for i in xrange(order - 1)] self.nonZeros = [defaultdict(float) for i in xrange(order - 1)] self.CoC = [[0.0 for j in xrange(4)] for i in xrange(order)] self.discounts = [0.0 for i in xrange(order - 1)] self.UD = 0. self.UN = defaultdict(float)
def __init__(self, order=3, sb="<s>", se="</s>"): self.sb = sb self.se = se self.order = order self.ngrams = NGramStack(order=order) self.denominators = [defaultdict(float) for i in xrange(order - 1)] self.numerators = [defaultdict(float) for i in xrange(order - 1)] #Modified Kneser-Ney requires that we track the individual N_i # in contrast to Kneser-Ney, which just requires the sum-total. self.nonZeros = [ defaultdict(lambda: defaultdict(float)) for i in xrange(order - 1) ] self.CoC = [[0.0 for j in xrange(4)] for i in xrange(order)] self.discounts = [[0.0 for j in xrange(3)] for i in xrange(order - 1)] self.UD = 0. self.UN = defaultdict(float)