import sys import os sys.path.append(sys.path[0] + '/../..') os.environ['PYTHONPATH'] = sys.path[0] + '/../..' from pynlpl.formats.sonar import Corpus from pynlpl.lm.lm import SimpleLanguageModel #syntax: ./make_sonar_lm.py sonar_dir output_file n [category] outputfile = sys.argv[2] n = 3 restrictcollection = "" try: n = int(sys.argv[3]) restrictcollection = sys.argv[4] except: pass lm = SimpleLanguageModel(n) for doc in Corpus(sys.argv[1], 'tok', restrictcollection): for sentence_id, sentence in doc.sentences(): print sentence_id words = [word for word, id, pos, lemma in sentence] lm.append(words) lm.save(outputfile)
from pynlpl.formats.sonar import Corpus from pynlpl.lm.lm import SimpleLanguageModel #syntax: ./make_sonar_lm.py sonar_dir output_file n [category] outputfile = sys.argv[2] n=3 restrictcollection="" try: n = int(sys.argv[3]) restrictcollection = sys.argv[4] except: pass lm = SimpleLanguageModel(n) for doc in Corpus(sys.argv[1],'tok',restrictcollection): for sentence_id, sentence in doc.sentences(): print sentence_id words = [ word for word, id, pos, lemma in sentence ] lm.append(words) lm.save(outputfile)
#!/usr/bin/env python #-*- coding:utf-8 -*- import sys import os sys.path.append(sys.path[0] + '/../..') os.environ['PYTHONPATH'] = sys.path[0] + '/../..' from pynlpl.lm.lm import SimpleLanguageModel #syntax: ./query_lm.py lm_file sentence lmfile = sys.argv[1] lm = SimpleLanguageModel() lm.load(lmfile) print lm.scoresentence(sys.argv[2:])