Пример #1
0
    def build_paths(self):
        aut = self.aut
        self.paths = defaultdict(set)
        self.paths[()].add(("^",))
        needed = set(self.corpus.keys())
        top_sort_needed = set(closure_and_top_sort(self.corpus.keys()))

        # iterate through all the paths with DFS-like algorithm
        # and prune if needed
        while len(needed) > 0:
            for s, paths in self.paths.items():
                for path in paths.copy():
                    last = path[-1]
                    if last not in aut.m:
                        continue

                    for tgt in aut.m[last]:
                        new_path = path + (tgt,)
                        emission = (aut.emissions[tgt] if tgt in aut.emissions
                                    else ())
                        new_s = s + emission
                        if new_s not in top_sort_needed:
                            continue
                        self.paths[new_s].add(new_path)
                        if s in needed:
                            needed.remove(s)

        needed = set(self.corpus.keys())
        self.paths = dict((s, set([p for p in paths if p[-1] == "$"]))
                      for s, paths in self.paths.iteritems() if s in needed)
Пример #2
0
    def probability_of_strings(self, strings):
        """
        Expects a list of strings.
        Outputs a map from those strings to probabilities.
        """
        topsorted = closure_and_top_sort(strings)
        # remove empty string
        topsorted = topsorted[1:]

        memo = self.init_memo()
        output = {}

        for string in topsorted:
            self.update_probability_of_string(string, memo)
            output[string] = memo[string][self.state_indices["$"]]
        return output