Python LanguageModel примеры использования

Язык программирования: Python

Пространство имен/Пакет: languagemodel.srilm

Класс/Тип: LanguageModel

Примеров на hotexamples.com: 4

Python LanguageModel - 4 примера найдено. Это лучшие примеры Python кода для languagemodel.srilm.LanguageModel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

readNGram(1)

tokensProbability(1)

Пример #1

Показать файл

Файл: model.py Проект: zomux/chiropractor

 def __init__(self):
   """
   Initiate the language model
   initiate the model with weights in setting file
   """
   setting.load(["max_gram","file_lm","weight_word_penalty","weight_lm","weight_translate_costs"])
   self.maxGram = int(setting.max_gram)
   sys.stderr.write("[NovelDepStrModel] loading language model... \n")
   self.lm = LanguageModel(lm_file=setting.file_lm, n=setting.max_gram)
   self.weight_word_penalty = setting.weight_word_penalty
   self.weight_lm = setting.weight_lm
   self.weight_translate_costs = setting.weight_translate_costs

Пример #2

Показать файл

Файл: model.py Проект: zomux/chiropractor

 def __init__(self):
   """
   Initiate the language model
   initiate the model with weights in setting file
   """
   setting.load(["max_gram","file_lm","weights"])
   self.maxGram = int(setting.max_gram)
   sys.stderr.write("[GentileModel] Loading language model... \n")
   self.lm = LanguageModel(lm_file=setting.file_lm, n=setting.max_gram)
   self.weights = setting.weights
   self.lenWeights = len(self.weights)
   self.weightsForRules = self.weights[:-1]
   self.cacheLMProbs = {}

Пример #3

Показать файл

Файл: model.py Проект: zomux/chiropractor

class GentileModel:
  """
  class for presenting the model in Gentile Model
  """
  # { rule->cost object }
  # costs : static costs : dynamic costs : lm
  # score : costs * weights
  maxGram = None
  lm = None
  weights = None
  lenWeights = None
  weightsForRules = None
  cacheMode = False

  cacheLMProbs = None
  smode = False
  
  def __init__(self):
    """
    Initiate the language model
    initiate the model with weights in setting file
    """
    setting.load(["max_gram","file_lm","weights"])
    self.maxGram = int(setting.max_gram)
    sys.stderr.write("[GentileModel] Loading language model... \n")
    self.lm = LanguageModel(lm_file=setting.file_lm, n=setting.max_gram)
    self.weights = setting.weights
    self.lenWeights = len(self.weights)
    self.weightsForRules = self.weights[:-1]
    self.cacheLMProbs = {}

  def calculateRuleScore(self,rule):
    """
    calculate the cost of rule
    @type rule: string
    @rtype: object
    """
    costs = rule[2]
    return reduce(lambda x,y: x+y,[costs[i]*self.weightsForRules[i] for i in range(len(costs))])
    
  def sortRules(self,rules,limit=None):
    """
    For each [rules] , calculate the cost , and return sorted rules
    but only keep [limit] rules
    @type rules: list of string
    @type limit: number
    @rtype: list of string
    """
    if not limit:
      limit = setting.size_beam
      
    list_scores = []
    list_indexes = range(len(rules))

    for rule in rules:
      list_scores.append(self.calculateRuleScore(rule))


    list_indexes.sort(key=lambda x:list_scores[x],reverse=True)
    return [rules[i] for i in list_indexes[:limit]]

  def sortHypothesises(self, hyps, limit=None):
    """
    sort hypothesises

    @type hyps: list of GentileHypothesises
    """
    if not limit:
      limit = setting.size_cube_pruning
      
    hyps.sort(key=lambda x: x.score,reverse=True)
    return hyps[:limit]

  def calculateHypothesisScore(self,hyp):
    """
    Get score of a list of costs by plus together with weights

    @type hyp: GentileHypothesis
    """
    return reduce(lambda x,y: x+y,[hyp.costs[i]*self.weights[i] for i in range(self.lenWeights)])
  
  def calculateScore(self, costs):
    """
    Calculate score of a pure cost list.
    """
    return sum([costs[i]*self.weights[i] for i in range(len(costs))])

  def getSentenseCost(self,tokens):
    """
    @type tokens: list of string
    @rtype: float
    """
    if not tokens:
      return -0.15
    
    if self.smode:
      tokens.insert(0, "<s>")
      tokens.append("</s>")

    if len(tokens) == 1 and tokens[0] == "":
      return -0.15

    return self.lm.tokensProbability(tokens)

    # if len(tokens) < self.maxGram:
    #   iters = 1
    # else:
    #   iters = len(tokens) - self.maxGram + 1
    
    # prob = 0.0

    # for ibegin in range(iters):
    #   words = tokens[ibegin:ibegin+self.maxGram]
    #   if self.cacheMode:
    #     hashWords = hash(" ".join(words))
    #     try:
    #       prob_words = self.cacheLMProbs[hashWords]
    #     except :
    #       # could not find in cache
    #       prob_words = self.lm.readNGram(words)
    #       self.cacheLMProbs[hashWords] = prob_words
    #     prob += prob_words
    #   else:
    #     prob += self.lm.readNGram(words)
    # return prob

  # def getSentenseAverageCost(self,tokens):
  #   """
  #   @type tokens: list of string
  #   @rtype: float
  #   """
  #   if len(tokens) == 1 and tokens[0] == "":
  #     return -0.15
      
  #   if len(tokens) < self.maxGram:
  #     iters = 1
  #   else:
  #     iters = len(tokens) - self.maxGram + 1
    
  #   prob = 0.0

  #   for ibegin in range(iters):
  #     words = tokens[ibegin:ibegin+self.maxGram]
  #     if self.cacheMode:
  #       hashWords = hash(" ".join(words))
  #       try:
  #         prob_words = self.cacheLMProbs[hashWords]
  #       except :
  #         # could not find in cache
  #         prob_words = self.lm.readNGram(words)
  #         self.cacheLMProbs[hashWords] = prob_words
  #       prob += prob_words
  #     else:
  #       prob += self.lm.readNGram(words)
  #   return prob/iters

  def clearCache(self):
    """
    clear lm cache
    !!! deprecated
    """
    pass
    # self.cacheLMProbs.clear()

Пример #4

Показать файл

Файл: model.py Проект: zomux/chiropractor

class NovelDepStrModel(Model):
  """
  class for presenting the model in Novel Dep-to-string Model
  """
  # { rule->cost object }
  # cost object : { word_penalty:,lm_cost:,       table_cost:,future_cost:,rule_cost: }
  #                 |-----no weight------|        |-----------weighted--------------|
  mapRuleCostObj = {}
  maxGram = None
  lm = None
  weight_word_penalty = None
  weight_lm = None
  weight_translate_costs = None
  def __init__(self):
    """
    Initiate the language model
    initiate the model with weights in setting file
    """
    setting.load(["max_gram","file_lm","weight_word_penalty","weight_lm","weight_translate_costs"])
    self.maxGram = int(setting.max_gram)
    sys.stderr.write("[NovelDepStrModel] loading language model... \n")
    self.lm = LanguageModel(lm_file=setting.file_lm, n=setting.max_gram)
    self.weight_word_penalty = setting.weight_word_penalty
    self.weight_lm = setting.weight_lm
    self.weight_translate_costs = setting.weight_translate_costs

  def calcLMforRule(self,tgts):
    """
    A B C X D E X
    in 3 gram
    lcost = sum(cost of 3gram)
    fcost = sum(cost of 1gram and 2gram)
    @type tgts: list of string
    @rtype: (lcost,fcost)
    """

    ngram_start_pos = 0
    fcost,lcost = 0.0,0.0
    for i,word in enumerate(tgts):
      if word == "X":
        # reset start position when meet x
        ngram_start_pos = i+1
      elif i+1-ngram_start_pos < self.maxGram:
        # for small than max gram , calculate future cost
        fcost += self.lm.readNGram(tgts[ngram_start_pos:i+1])
      elif i+1-ngram_start_pos >= self.maxGram:
        # calculate language cost
        lcost += self.lm.readNGram(tgts[i-self.maxGram+1:i+1])
    return lcost,fcost

  def calcRuleCost(self,rule):
    """
    calculate the cost of rule
    @type rule: string
    @rtype: object
    """
    objcost = {}
    head,src,tgt,align,pfreq,probs = rule.split(" ||| ")
    tgts = tgt.split(",")

    objcost['word_penalty'] = -(len(tgts) - tgts.count("X"))

    lcost,fcost = self.calcLMforRule(tgts)
    objcost['future_cost'] = fcost*self.weight_lm
    objcost['lm_cost'] = lcost

    translate_costs = [float(p) for p in probs.split(" ")]

    objcost['table_cost'] = 0.0
    for i,cost in enumerate(translate_costs):
      objcost['table_cost'] += self.weight_translate_costs[i]*cost
      if setting.runningMode == "mert":
        objcost['tcost_'+str(i)] = self.weight_translate_costs[i]*cost

    objcost['rule_cost'] = self.weight_word_penalty*objcost['word_penalty'] + \
                        self.weight_lm*objcost['lm_cost'] + \
                        objcost['table_cost']




    return objcost
  def calcHypCost(self,hyp):
    """
    calculate the cost of hyp
    @type hyp: NovelDepStrHypothesis
    @rtype: float
    """
    return 1.0
  def sortRules(self,rules,limit=1000):
    """
    For each [rules] , calculate the cost , and return sorted rules
    but only keep [limit] rules
    @type rules: list of string
    @type limit: number
    @rtype: list of string
    """

    mapCurrentRuleCost = {}

    for rule in rules:
      hash_key = hash(rule)
      if hash_key not in self.mapRuleCostObj:
        objcost = self.calcRuleCost(rule)
        self.mapRuleCostObj[ hash_key ] = objcost
        mapCurrentRuleCost[ rule ] = objcost['rule_cost'] + objcost['future_cost']
      else:
        objcost = self.mapRuleCostObj[hash_key]
        mapCurrentRuleCost[ rule ] = objcost['rule_cost'] + objcost['future_cost']

    rules.sort(key=lambda x:mapCurrentRuleCost[x],reverse=True)
    return rules[:limit]

  def getRuleCostObj(self,rule):
    """
    @type rule: string
    @rtype: object
    """
    hash_key = hash(rule)
    if hash_key not in self.mapRuleCostObj:
      objcost = self.calcRuleCost(rule)
      self.mapRuleCostObj[ hash_key ] = objcost
      return objcost
    else:
      return self.mapRuleCostObj[hash_key]

  def getSentenseCost(self,tokens):
    """
    @type tokens: list of string
    @rtype: float
    """
    if len(tokens) < self.maxGram:
      iters = 1
    else:
      iters = len(tokens) - self.maxGram + 1
    prob = 0.0
    for ibegin in range(iters):
      prob += self.lm.readNGram(tokens[ibegin:ibegin+self.maxGram])
       
    
    return prob/iters