def test_k_in_order(setup): input_, top, k = setup result = topk(k, input_) start = -1e4 for x in top: assert result[x] > start start = result[x]
def conditional(self, context, target: Optional[List[str]] = None, limit: Optional[int] = None, raw: bool = False, **kwargs): """Get the conditional probabilities of the next tokens. :param context: The tokens :param target: A list of values that you want the conditional prob of P(target | context) :param limit: The number of (next word, score) pairs to return :param raw: Should you just return the raw softmax values? This will override the limit argument :returns: The conditional probs of a specific target word if `target` is defined, the top limit softmax scores for the next step, or the raw softmax numpy array if `raw` is set """ if kwargs.get('preproc', None) is not None: logger.warning( "Warning: Passing `preproc` to `LanguageModelService.predict` is deprecated." ) tokens_batch = self.batch_input(context) self.prepare_vectorizers(tokens_batch) batch_dict = self.vectorize(tokens_batch) next_softmax = self.model.predict(batch_dict)[:, -1, :] next_softmax = to_numpy(next_softmax) if target is not None: target_batch = [[t] for t in target] self.prepare_vectorizers(target_batch) target_batch = self.vectorize(target_batch) target = target_batch[self.model.tgt_key] return np.array([ v.item() for v in next_softmax[np.arange(next_softmax.shape[0]), target] ]) if raw: return next_softmax limit = next_softmax.shape[-1] if limit is None else limit scores = [topk(limit, soft) for soft in next_softmax] return [{self.idx_to_token[k]: v for k, v in score.items()} for score in scores]
def predict_one(self, src, encoder_outputs, **kwargs): K = int(kwargs.get('beam', 5)) mxlen = int(kwargs.get('mxlen', 100)) paths = [[Offsets.GO] for _ in range(K)] # Which beams are done? done = np.array([False] * K) scores = np.array([0.0] * K) hidden, output_i, context = self.arc_policy(encoder_outputs, self.hsz, beam_width=K) num_states = len(hidden) rnn_state = self.decoder_rnn.initial_state(hidden) self.attn_cache(context) src_mask = encoder_outputs.src_mask for i in range(mxlen): dst_last = np.array([path[-1] for path in paths]).reshape(1, K) embed_i = self.tgt_embeddings.encode(dst_last)[-1] embed_i = self.input_i(embed_i, output_i) rnn_state = rnn_state.add_input(embed_i) rnn_output_i = rnn_state.output() output_i = self.attn(rnn_output_i, src_mask) wll = self.prediction([output_i])[-1].npvalue() # (V,) K V = wll.shape[0] if i > 0: # expanded_history = np.expand_dims(scores, -1) # done_mask = np.expand_dims((done == False).astype(np.uint8), -1) # sll = np.multiply(wll.T, done_mask) + expanded_history wll = wll.T expanded_history = np.expand_dims(scores, -1) done_mask = np.expand_dims((done == False).astype(np.uint8), -1) done_mask_inv = (done_mask != 1).astype(np.uint8) eos_mask = np.zeros((1, V)).astype(np.uint8) mask = ((done_mask & eos_mask) != 1).astype(np.uint8) masked_wll = np.multiply(done_mask, wll) negged_wll = masked_wll + (done_mask_inv * -1e4) removed_eos = np.multiply(mask, negged_wll) sll = removed_eos + expanded_history else: sll = wll.T flat_sll = sll.reshape(-1) bests = topk(K, flat_sll) best_idx_flat = np.array(list(bests.keys())) best_beams = best_idx_flat // V best_idx = best_idx_flat % V new_paths = [] new_done = [] hidden = rnn_state.s() new_hidden = [[] for _ in range(num_states)] for j, best_flat in enumerate(best_idx_flat): beam_id = best_beams[j] best_word = best_idx[j] if done[j]: new_paths.append(paths[beam_id] + [Offsets.EOS]) else: new_paths.append(paths[beam_id] + [best_word]) if best_word == Offsets.EOS: done[j] = True new_done.append(done[beam_id]) scores[j] = bests[best_flat] # For each path, we need to pick that out and add it to the hiddens # This will be (c1, c2, ..., h1, h2, ...) for h_i, h in enumerate(hidden): new_hidden[h_i].append(dy.pick_batch_elem(h, beam_id)) done = np.array(new_done) new_hidden = [ dy.concatenate_to_batch(new_h) for new_h in new_hidden ] paths = new_paths rnn_state = self.decoder_rnn.initial_state(new_hidden) paths = np.stack([p[1:] for p in paths]) return paths, scores
def predict_one(self, src, encoder_outputs, **kwargs): K = int(kwargs.get('beam', 5)) mxlen = int(kwargs.get('mxlen', 100)) paths = [[Offsets.GO] for _ in range(K)] # Which beams are done? done = np.array([False] * K) scores = np.array([0.0]*K) hidden, output_i, context = self.arc_policy(encoder_outputs, self.hsz, beam_width=K) num_states = len(hidden) rnn_state = self.decoder_rnn.initial_state(hidden) self.attn_cache(context) src_mask = encoder_outputs.src_mask for i in range(mxlen): dst_last = np.array([path[-1] for path in paths]).reshape(1, K) embed_i = self.tgt_embeddings.encode(dst_last)[-1] embed_i = self.input_i(embed_i, output_i) rnn_state = rnn_state.add_input(embed_i) rnn_output_i = rnn_state.output() output_i = self.attn(rnn_output_i, src_mask) wll = self.prediction([output_i])[-1].npvalue() # (V,) K V = wll.shape[0] if i > 0: # expanded_history = np.expand_dims(scores, -1) # done_mask = np.expand_dims((done == False).astype(np.uint8), -1) # sll = np.multiply(wll.T, done_mask) + expanded_history wll = wll.T expanded_history = np.expand_dims(scores, -1) done_mask = np.expand_dims((done == False).astype(np.uint8), -1) done_mask_inv = (done_mask != 1).astype(np.uint8) eos_mask = np.zeros((1, V)).astype(np.uint8) mask = ((done_mask & eos_mask) != 1).astype(np.uint8) masked_wll = np.multiply(done_mask, wll) negged_wll = masked_wll + (done_mask_inv * -1e4) removed_eos = np.multiply(mask, negged_wll) sll = removed_eos + expanded_history else: sll = wll.T flat_sll = sll.reshape(-1) bests = topk(K, flat_sll) best_idx_flat = np.array(list(bests.keys())) best_beams = best_idx_flat // V best_idx = best_idx_flat % V new_paths = [] new_done = [] hidden = rnn_state.s() new_hidden = [[] for _ in range(num_states)] for j, best_flat in enumerate(best_idx_flat): beam_id = best_beams[j] best_word = best_idx[j] if done[j]: new_paths.append(paths[beam_id] + [Offsets.EOS]) else: new_paths.append(paths[beam_id] + [best_word]) if best_word == Offsets.EOS: done[j] = True new_done.append(done[beam_id]) scores[j] = bests[best_flat] # For each path, we need to pick that out and add it to the hiddens # This will be (c1, c2, ..., h1, h2, ...) for h_i, h in enumerate(hidden): new_hidden[h_i].append(dy.pick_batch_elem(h, beam_id)) done = np.array(new_done) new_hidden = [dy.concatenate_to_batch(new_h) for new_h in new_hidden] paths = new_paths rnn_state = self.decoder_rnn.initial_state(new_hidden) paths = np.stack([p[1:] for p in paths]) return paths, scores
def beam_decode(self, src, src_len, K=2): GO = self.vocab2['<GO>'] EOS = self.vocab2['<EOS>'] dy.renew_cg() paths = [[GO] for _ in range(K)] # Which beams are done? done = np.array([False] * K) scores = np.array([0.0] * K) dy.renew_cg() rnn_enc_seq, hidden = self.encode(src, src_len) context_mx = dy.concatenate_cols(rnn_enc_seq) # To vectorize, we need to expand along the batch dimension, K times final_encoder_state_k = (dy.concatenate_to_batch([h] * K) for h in hidden) num_states = len(hidden) rnn_state = self.decoder_rnn.initial_state(final_encoder_state_k) attn_fn = self._attn(context_mx) output_i = dy.concatenate_to_batch([rnn_enc_seq[-1]] * K) for i in range(100): dst_last = np.array([path[-1] for path in paths]).reshape(1, K) embed_i = self.embed_out(dst_last)[-1] embed_i = self.input_i(embed_i, output_i) rnn_state = rnn_state.add_input(embed_i) rnn_output_i = rnn_state.output() output_i = attn_fn(rnn_output_i) wll = self.prediction(output_i).npvalue() # (V,) K V = wll.shape[0] if i > 0: expanded_history = scores.reshape(scores.shape + (1, )) # scores = K # TODO: dont add anything when the beam is done sll = wll.T + expanded_history else: sll = wll.T flat_sll = sll.reshape(-1) bests = topk(K, flat_sll) best_idx_flat = np.array(list(bests.keys())) best_beams = best_idx_flat // V best_idx = best_idx_flat % V new_paths = [] new_done = [] hidden = rnn_state.s() # For each hidden state new_hidden = [[] for _ in range(num_states)] for j, best_flat in enumerate(best_idx_flat): beam_id = best_beams[j] best_word = best_idx[j] if best_word == EOS: done[j] = True new_done.append(done[beam_id]) new_paths.append(paths[beam_id] + [best_word]) scores[j] = bests[best_flat] # For each path, we need to pick that out and add it to the hiddens # This will be (c1, c2, ..., h1, h2, ...) for h_i, h in enumerate(hidden): new_hidden[h_i] += [dy.pick_batch_elem(h, beam_id)] done = np.array(new_done) new_hidden = [ dy.concatenate_to_batch(new_h) for new_h in new_hidden ] paths = new_paths # Now comes the hard part, fix the hidden units # Copy the beam states of the winners rnn_state = self.decoder_rnn.initial_state(new_hidden) return [p[1:] for p in paths], scores
def test_k_values_are_correct(setup): input_, top, k = setup result = topk(k, input_) for k, v in result.items(): assert v == input_[k]
def test_k_are_correct(setup): input_, top, k = setup result = topk(k, input_) for x in top: assert x in result
def test_k_drawn(setup): input_, top, k = setup result = topk(k, input_) assert len(result) == k