def __init__(self): # initialize all "global" data logger.debug('loading...') logger.debug(' corpus...') # FIXME: using absolute paths is the easiest way to make us work from cmdline and invoked # in a web app. perhaps we could set up softlinks in /var/ to make this slightly more respectable. self.g = GramsBin( '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin', '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin' ) self.w = Words(NGram3BinWordCounter(self.g.ng)) logger.debug(' phon') self.p = Phon(self.w, self.g) logger.debug('done.') # sanity-check junk """
if x == y: return 0 damlev = ngd.diff.damlev sx, sy = p.phraseSound([x]), p.phraseSound([y]) if sx == sy and sx: # sound the same, e.g. there/their. consider these equal. return damlev # otherwise, calculate phonic/edit difference return max(damlev, min(NGramDiffScore.overlap(sx, sy), abs(len(x) - len(y)))) if __name__ == '__main__': import sys sys.path.append('..') from grambin import GramsBin from word import Words, NGram3BinWordCounter from phon import Phon import logging logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) logging.debug('loading...') g = GramsBin( '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin', '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin') w = Words(NGram3BinWordCounter(g.ng)) p = Phon(w, g) logging.debug('loaded.') pass