def get_nb_prob(pos_entries, index, probs_for_sense, stop_words): cur_word = pos_entries[index]['word'] prob = lookup_prob(probs_for_sense['cur_word'], cur_word) cur_word_pos = pos_entries[index]['pos'] prob += lookup_prob(probs_for_sense['cur_word_pos'], cur_word_pos) context_word_pairs, context_pos_pairs = \ annotator.get_context_list(pos_entries, index, LOCAL_CONTEXT_WINDOW) for context_word_pair in context_word_pairs: prob += lookup_prob(probs_for_sense['context_words'], context_word_pair) for context_pos_pair in context_pos_pairs: prob += lookup_prob(probs_for_sense['context_poses'], context_pos_pair) global_context_set = annotator.get_context_set(pos_entries, index, \ GLOBAL_CONTEXT_WINDOW) - stop_words for global_context_word, probs_pair in \ probs_for_sense['global_context_words'].iteritems(): if global_context_word in global_context_set: prob += probs_pair[0] # present log prob else: prob += probs_pair[1] # not present log prob return prob
def update_features(pos_entries, index, counters_for_sense, stop_words): cur_word = pos_entries[index]['word'] counters_for_sense['cur_word'][cur_word] += 1 # Current word feature and current word POS features cur_word_pos = pos_entries[index]['pos'] counters_for_sense['cur_word_pos'][cur_word_pos] += 1 # Context word features local_context_word_pairs, local_context_pos_pairs = \ annotator.get_context_list(pos_entries, index, LOCAL_CONTEXT_WINDOW) counters_for_sense['context_words'].update(local_context_word_pairs) counters_for_sense['context_poses'].update(local_context_pos_pairs) global_context_set = annotator.get_context_set(pos_entries, index, \ GLOBAL_CONTEXT_WINDOW) - stop_words counters_for_sense['global_context_words'].update(global_context_set) counters_for_sense['num_examples'] += 1