def build_paths(self): aut = self.aut self.paths = defaultdict(set) self.paths[()].add(("^",)) needed = set(self.corpus.keys()) top_sort_needed = set(closure_and_top_sort(self.corpus.keys())) # iterate through all the paths with DFS-like algorithm # and prune if needed while len(needed) > 0: for s, paths in self.paths.items(): for path in paths.copy(): last = path[-1] if last not in aut.m: continue for tgt in aut.m[last]: new_path = path + (tgt,) emission = (aut.emissions[tgt] if tgt in aut.emissions else ()) new_s = s + emission if new_s not in top_sort_needed: continue self.paths[new_s].add(new_path) if s in needed: needed.remove(s) needed = set(self.corpus.keys()) self.paths = dict((s, set([p for p in paths if p[-1] == "$"])) for s, paths in self.paths.iteritems() if s in needed)
def probability_of_strings(self, strings): """ Expects a list of strings. Outputs a map from those strings to probabilities. """ topsorted = closure_and_top_sort(strings) # remove empty string topsorted = topsorted[1:] memo = self.init_memo() output = {} for string in topsorted: self.update_probability_of_string(string, memo) output[string] = memo[string][self.state_indices["$"]] return output