def prob_of_transition(transition, counts): top = serialize_tags(transition) count_top = counts[top] bottom = serialize_tags(transition[:-1]) count_bottom = counts[bottom] prob = float(count_top) / count_bottom return prob
def store_transitions(tags): if not hasattr(store_transitions, '_counts'): store_transitions._counts = dict(START=0) store_transitions._counts['START'] += 1 for a_chunk_size in range(1, 4): for set_of_tags in sets_of_tags(tags, chunk_size=min(a_chunk_size, len(tags))): serialized = serialize_tags(set_of_tags) store_transitions._counts.setdefault(serialized, 0) store_transitions._counts[serialized] += 1