示例#1
0
def get_nb_prob(pos_entries, index, probs_for_sense, stop_words):
  cur_word = pos_entries[index]['word']
  prob = lookup_prob(probs_for_sense['cur_word'], cur_word)
  
  cur_word_pos = pos_entries[index]['pos']
  prob += lookup_prob(probs_for_sense['cur_word_pos'], cur_word_pos)

  context_word_pairs, context_pos_pairs = \
    annotator.get_context_list(pos_entries, index, LOCAL_CONTEXT_WINDOW)

  for context_word_pair in context_word_pairs:
    prob += lookup_prob(probs_for_sense['context_words'], context_word_pair)

  for context_pos_pair in context_pos_pairs:
    prob += lookup_prob(probs_for_sense['context_poses'], context_pos_pair)

  global_context_set = annotator.get_context_set(pos_entries, index, \
    GLOBAL_CONTEXT_WINDOW) - stop_words
  for global_context_word, probs_pair in \
    probs_for_sense['global_context_words'].iteritems():
    if global_context_word in global_context_set:
      prob += probs_pair[0] # present log prob
    else:
      prob += probs_pair[1] # not present log prob
    
  return prob
示例#2
0
def update_features(pos_entries, index, counters_for_sense, stop_words):
  cur_word = pos_entries[index]['word']
  counters_for_sense['cur_word'][cur_word] += 1
  
  # Current word feature and current word POS features
  cur_word_pos = pos_entries[index]['pos']
  counters_for_sense['cur_word_pos'][cur_word_pos] += 1   

  # Context word features
  local_context_word_pairs, local_context_pos_pairs = \
    annotator.get_context_list(pos_entries, index, LOCAL_CONTEXT_WINDOW)
  counters_for_sense['context_words'].update(local_context_word_pairs)
  counters_for_sense['context_poses'].update(local_context_pos_pairs)
  
  global_context_set = annotator.get_context_set(pos_entries, index, \
    GLOBAL_CONTEXT_WINDOW) - stop_words
  counters_for_sense['global_context_words'].update(global_context_set)
  counters_for_sense['num_examples'] += 1