Python Ngrammodel примеры использования

Язык программирования: Python

Пространство имен/Пакет: splitter.ngrammodel

Класс/Тип: Ngrammodel

Примеров на hotexamples.com: 2

Python Ngrammodel - 2 примера найдено. Это лучшие примеры Python кода для splitter.ngrammodel.Ngrammodel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

loadSentences(1)

trainNgramModel(1)

Пример #1

Показать файл

Файл: SentenceSplitter.py Проект: phanigadde/MSRCSWork

 def __init__(self):
   self.sentences = [] # [(word, tag, lang), ...]
   self.scoredSentences = [] # [(word, tag, lang, hiScore, enScore), ...]
   self.enTrigramModel = Ngrammodel()
   self.hiTrigramModel = Ngrammodel()

Пример #2

Показать файл

Файл: SentenceSplitter.py Проект: phanigadde/MSRCSWork

class SentenceSplitter:
  def __init__(self):
    self.sentences = [] # [(word, tag, lang), ...]
    self.scoredSentences = [] # [(word, tag, lang, hiScore, enScore), ...]
    self.enTrigramModel = Ngrammodel()
    self.hiTrigramModel = Ngrammodel()
  
  def trainLMs(self, enCorpus, hiCorpus, n):
    self.enTrigramModel.loadSentences(enCorpus)
    self.enTrigramModel.trainNgramModel(n)
    self.hiTrigramModel.loadSentences(hiCorpus)
    self.hiTrigramModel.trainNgramModel(n)
  
  def loadSentences(self, sentencesCSV):
    csvLines = readlinesFromCSV(sentencesCSV)
    sent = []
    for line in csvLines[1:]:
      if line[0] == u'':
        self.sentences.append(sent)
        sent = []
      else:
        sent.append(tuple(line))

  def scoreWithTrigamLM(self, word, context, lm):
    if len(context) > 2:
      context = tuple(context[-2:])
    '''elif len(context) < 2:
      context = ['', ''] + context
      context = tuple(context[-2:])
    else:'''
    context = tuple(context)
    ##print "context:", context, " word:", word
    prob = lm.prob(word, context)
    if prob == 0:
      prob = 0.001
    #return -log(prob)
    return prob
  
  def scoreSentence(self, sentIndex):
    sentence = self.sentences[sentIndex]
    context = []
    newSentence = []
    for word, tag, lang in sentence:
      hiScore = self.scoreWithTrigamLM(word, context, self.hiTrigramModel)
      enScore = self.scoreWithTrigamLM(word, context, self.enTrigramModel)
      newSentence.append((word, tag, lang, hiScore, enScore))
      context.append(word)
    return newSentence
  
  def scoreSentences(self):
    self.scoredSentences = []
    for sentIndex in range(len(self.sentences)):
      self.scoredSentences.append(self.scoreSentence(sentIndex))

  def sanityCheck(self):
    print len(self.scoredSentences)
    print self.scoredSentences[0]
  
  def analyzeSentences(self):
    for sentence in self.scoredSentences:
      print '\n'.join(map(lambda x:' '.join(map(lambda y:str(y), x)),sentence))+'\n'
      dummy = raw_input()