def E_step(self, model, N_processes=4):
        # initialize the expected counts
        Pseg = defaultdict(lambda: 0)
        LL = 0

        if N_processes > 1:
            global shared_model, shared_params, shared_lines
            # put the current model, params and lines into global variables that will be shared with child processes.
            shared_model = model
            shared_params = self.params
            shared_lines = self.data.lines

            # each process is responsible for lines[n] for n % N_processes = i
            pool = mp.Pool(N_processes)
            results = pool.map(parallel_forward_backwards, [(N_processes, i) for i in xrange(N_processes)])
            pool.close()

            # sum the results.
            for logprob_i, counts_i in results:
                LL += logprob_i
                for seg in counts_i:
                    Pseg[seg] += counts_i[seg]      # accumulate the expected counts

        else:
            for i, line in enumerate(self.data.lines):       # go over each sentence and collect expected counts
                logprob_i, counts_i = MonotoneFSTUtil.forward_backward(model, self.params, line)
                LL += logprob_i

                for seg in counts_i:
                    Pseg[seg] += counts_i[seg]      # accumulate the expected counts

        ec = Model(Pseg)
        return LL, ec
    def viterbi_decode(self, lines=None):
        if lines is None:
            lines = process_lines(self.data.lines_dev, self.params.remove_whitespace, self.params.chop)

        decodes = []
        for i, line in enumerate(lines):
            decode_i = MonotoneFSTUtil.viterbi(self.model, self.params, line, line_no=i)
            I = decode_i[0]
            decodes.append([SegUtil.segment_str(line, I)] + list(decode_i))
        return decodes
Пример #3
0
    def viterbi_decode(self, lines=None):
        if lines is None:
            lines = process_lines(self.data.lines_dev,
                                  self.params.remove_whitespace,
                                  self.params.chop)

        decodes = []
        for i, line in enumerate(lines):
            decode_i = MonotoneFSTUtil.viterbi(self.model,
                                               self.params,
                                               line,
                                               line_no=i)
            I = decode_i[0]
            decodes.append([SegUtil.segment_str(line, I)] + list(decode_i))
        return decodes
Пример #4
0
    def E_step(self, model, N_processes=4):
        # initialize the expected counts
        Pseg = defaultdict(lambda: 0)
        LL = 0

        if N_processes > 1:
            global shared_model, shared_params, shared_lines
            # put the current model, params and lines into global variables that will be shared with child processes.
            shared_model = model
            shared_params = self.params
            shared_lines = self.data.lines

            # each process is responsible for lines[n] for n % N_processes = i
            pool = mp.Pool(N_processes)
            results = pool.map(parallel_forward_backwards,
                               [(N_processes, i) for i in xrange(N_processes)])
            pool.close()

            # sum the results.
            for logprob_i, counts_i in results:
                LL += logprob_i
                for seg in counts_i:
                    Pseg[seg] += counts_i[
                        seg]  # accumulate the expected counts

        else:
            for i, line in enumerate(
                    self.data.lines
            ):  # go over each sentence and collect expected counts
                logprob_i, counts_i = MonotoneFSTUtil.forward_backward(
                    model, self.params, line)
                LL += logprob_i

                for seg in counts_i:
                    Pseg[seg] += counts_i[
                        seg]  # accumulate the expected counts

        ec = Model(Pseg)
        return LL, ec