def E_step(self, model, N_processes=4): # initialize the expected counts Pseg = defaultdict(lambda: 0) LL = 0 if N_processes > 1: global shared_model, shared_params, shared_lines # put the current model, params and lines into global variables that will be shared with child processes. shared_model = model shared_params = self.params shared_lines = self.data.lines # each process is responsible for lines[n] for n % N_processes = i pool = mp.Pool(N_processes) results = pool.map(parallel_forward_backwards, [(N_processes, i) for i in xrange(N_processes)]) pool.close() # sum the results. for logprob_i, counts_i in results: LL += logprob_i for seg in counts_i: Pseg[seg] += counts_i[seg] # accumulate the expected counts else: for i, line in enumerate(self.data.lines): # go over each sentence and collect expected counts logprob_i, counts_i = MonotoneFSTUtil.forward_backward(model, self.params, line) LL += logprob_i for seg in counts_i: Pseg[seg] += counts_i[seg] # accumulate the expected counts ec = Model(Pseg) return LL, ec
def viterbi_decode(self, lines=None): if lines is None: lines = process_lines(self.data.lines_dev, self.params.remove_whitespace, self.params.chop) decodes = [] for i, line in enumerate(lines): decode_i = MonotoneFSTUtil.viterbi(self.model, self.params, line, line_no=i) I = decode_i[0] decodes.append([SegUtil.segment_str(line, I)] + list(decode_i)) return decodes
def E_step(self, model, N_processes=4): # initialize the expected counts Pseg = defaultdict(lambda: 0) LL = 0 if N_processes > 1: global shared_model, shared_params, shared_lines # put the current model, params and lines into global variables that will be shared with child processes. shared_model = model shared_params = self.params shared_lines = self.data.lines # each process is responsible for lines[n] for n % N_processes = i pool = mp.Pool(N_processes) results = pool.map(parallel_forward_backwards, [(N_processes, i) for i in xrange(N_processes)]) pool.close() # sum the results. for logprob_i, counts_i in results: LL += logprob_i for seg in counts_i: Pseg[seg] += counts_i[ seg] # accumulate the expected counts else: for i, line in enumerate( self.data.lines ): # go over each sentence and collect expected counts logprob_i, counts_i = MonotoneFSTUtil.forward_backward( model, self.params, line) LL += logprob_i for seg in counts_i: Pseg[seg] += counts_i[ seg] # accumulate the expected counts ec = Model(Pseg) return LL, ec