def get_answerable_logits(self, contextual_embedding, p_mask): """Get the answerable logits. Parameters ---------- contextual_embedding Shape (batch_size, sequence_length, C) p_mask Shape (batch_size, sequence_length) Mask the sequence. 0 --> Denote that the element is masked, 1 --> Denote that the element is not masked Returns ------- answerable_logits Shape (batch_size, 2) """ # Shape (batch_size, sequence_length) start_scores = np.squeeze(self.start_scores(contextual_embedding), -1) start_score_weights = masked_softmax(start_scores, p_mask, axis=-1) start_agg_feature = npx.batch_dot(np.expand_dims(start_score_weights, axis=1), contextual_embedding) start_agg_feature = np.squeeze(start_agg_feature, 1) cls_feature = contextual_embedding[:, 0, :] answerable_scores = self.answerable_scores(np.concatenate([start_agg_feature, cls_feature], axis=-1)) answerable_logits = npx.log_softmax(answerable_scores, axis=-1) return answerable_logits
def forward(self, user_id, seq, item_id): item_embs = np.expand_dims(self.Q(seq), 1) user_emb = self.P(user_id) # (4096, 10) out, out_h, out_v, out_hs = None, None, None, [] # 横向卷积 if self.d_prime: out_v = self.conv_v(item_embs) out_v = out_v.reshape( out_v.shape[0], self.fc1_dim_v) # (4096, 4*10) # 纵向卷积 - 时间 if self.d: for conv, maxp in zip(self.conv_h, self.max_pool): # 滑动 conv_out = np.squeeze(npx.relu(conv(item_embs)), axis=3) t = maxp(conv_out) pool_out = np.squeeze(t, axis=2) out_hs.append(pool_out) out_h = np.concatenate(out_hs, axis=1) # (4096, 16*3) out = np.concatenate([out_v, out_h], axis=1) # (4096, 4*10+16*3) z = self.fc(self.dropout(out)) # (4096, 10) # 和user_emb x = np.concatenate([z, user_emb], axis=1) # (4096, 20) # 和item_emb计算 q_prime_i = np.squeeze(self.Q_prime(item_id)) # (4096, 20) b = np.squeeze(self.b(item_id)) res = (x * q_prime_i).sum(1) + b # (4096,) return res
def forward(self, user_id, item_id): P_u = self.P(user_id) Q_i = self.Q(item_id) b_u = self.user_bias(user_id) b_i = self.item_bias(item_id) outputs = (P_u * Q_i).sum(axis=1) + np.squeeze(b_u) + np.squeeze(b_i) return outputs.flatten()
def _training_cell_state_transform( previous_cell_state, weighted_inputs, forget_rates) -> Tuple[np.ndarray, np.ndarray]: """Update SSRU cell at training time""" def _time_step_update( step_input_and_forget_rate, previous_step_state) -> Tuple[np.ndarray, np.ndarray]: """ Recurrently update the SSRU cell state for one time step. :param step_input_and_forget_rate: List = [step_input, forget_rate] :param previous_step_state: cell state at (t-1) :return: twice the current time step state. NOTE: The first instance will be stacked in the final foreach output and the second will be the input to the next time_step_update iteration. """ step_input, forget_rate = step_input_and_forget_rate # each of shape (batch_size, model_size) current_step_state = forget_rate * previous_step_state + step_input return current_step_state, current_step_state # (max_length, batch, input_depth), (batch, input_depth) cell_state, last_step_state = npx.foreach( _time_step_update, [weighted_inputs, forget_rates], np.squeeze(previous_cell_state, axis=0)) return cell_state, np.expand_dims(last_step_state, axis=0)
def _func(*states): i = states[0] s = states[1: ] data = np.squeeze(np.take(inputs, i), axis=0) out, new_s = self.cell(data, s) new_s = [i + 1] + new_s return out, new_s
def forward(self, tokens, token_types, valid_length, p_mask, start_position): """ Parameters ---------- tokens Shape (batch_size, sequence_length) token_types Shape (batch_size, sequence_length) valid_length Shape (batch_size,) p_mask Shape (batch_size, sequence_length) start_position Shape (batch_size,) Returns ------- start_logits Shape (batch_size, sequence_length) end_logits Shape (batch_size, sequence_length) answerable_logits """ if self.use_segmentation: contextual_embeddings = self.backbone(tokens, token_types, valid_length) else: contextual_embeddings = self.backbone(tokens, valid_length) start_logits = self.get_start_logits(contextual_embeddings, p_mask) end_logits = self.get_end_logits(contextual_embeddings, np.expand_dims(start_position, axis=1), p_mask) end_logits = np.squeeze(end_logits, axis=1) answerable_logits = self.get_answerable_logits(contextual_embeddings, p_mask) return start_logits, end_logits, answerable_logits
def get_end_logits(self, contextual_embedding, start_positions, p_mask): """ Parameters ---------- contextual_embedding Shape (batch_size, sequence_length, C) start_positions Shape (batch_size, N) We process multiple candidates simultaneously p_mask Shape (batch_size, sequence_length) Returns ------- end_logits Shape (batch_size, N, sequence_length) """ # Select the features at the start_positions # start_feature will have shape (batch_size, N, C) start_features = select_vectors_by_position(contextual_embedding, start_positions) # Concatenate the start_feature and the contextual_embedding contextual_embedding = np.expand_dims(contextual_embedding, axis=1) # (B, 1, T, C) start_features = np.expand_dims(start_features, axis=2) # (B, N, 1, C) concat_features = np.concatenate([npx.broadcast_like(start_features, contextual_embedding, 2, 2), npx.broadcast_like(contextual_embedding, start_features, 1, 1)], axis=-1) # (B, N, T, 2C) end_scores = self.end_scores(concat_features) end_scores = np.squeeze(end_scores, -1) end_logits = masked_logsoftmax(end_scores, mask=np.expand_dims(p_mask, axis=1), axis=-1) return end_logits
def forward(self, data, valid_length): # pylint: disable=arguments-differ # We will catch the optional factor weights in kwargs average_factors_embeds = [] # type: List[np.ndarray] concat_factors_embeds = [] # type: List[np.ndarray] sum_factors_embeds = [] # type: List[np.ndarray] if self.config.num_factors > 1 and self.config.factor_configs is not None: data, *data_factors = (np.squeeze(x, axis=2) for x in np.split(data, self.config.num_factors, axis=2)) for i, (factor_data, factor_config) in enumerate(zip(data_factors, self.config.factor_configs)): factor_weight = self.factor_weights[i] factor_embedding = npx.embedding(factor_data, input_dim=factor_config.vocab_size, weight=factor_weight.data(), output_dim=factor_config.num_embed) if factor_config.combine == C.FACTORS_COMBINE_CONCAT: concat_factors_embeds.append(factor_embedding) elif factor_config.combine == C.FACTORS_COMBINE_SUM: sum_factors_embeds.append(factor_embedding) elif factor_config.combine == C.FACTORS_COMBINE_AVERAGE: average_factors_embeds.append(factor_embedding) else: raise ValueError("Unknown combine value for factors: %s" % factor_config.combine) else: data = np.squeeze(data, axis=2) embed = npx.embedding(data, weight=self.weight.data(), input_dim=self.config.vocab_size, output_dim=self.config.num_embed, dtype=self._dtype, sparse_grad=False) if self.config.num_factors > 1 and self.config.factor_configs is not None: if average_factors_embeds: embed = npx.add_n(embed, *average_factors_embeds) / (len(average_factors_embeds) + 1) if sum_factors_embeds: embed = npx.add_n(embed, *sum_factors_embeds) if concat_factors_embeds: embed = np.concatenate((embed, *concat_factors_embeds), axis=2) if self.config.dropout > 0: embed = npx.dropout(data=embed, p=self.config.dropout) return embed, np.copy(valid_length) # See https://github.com/apache/incubator-mxnet/issues/14228
def forward(self, user_id, seq, item_id): item_embs = np.expand_dims(self.Q(seq), 1) user_emb = self.P(user_id) out, out_h, out_v, out_hs = None, None, None, [] if self.d_prime: out_v = self.conv_v(item_embs) out_v = out_v.reshape(out_v.shape[0], self.fc1_dim_v) if self.d: for conv, maxp in zip(self.conv_h, self.max_pool): conv_out = np.squeeze(npx.relu(conv(item_embs)), axis=3) t = maxp(conv_out) pool_out = np.squeeze(t, axis=2) out_hs.append(pool_out) out_h = np.concatenate(out_hs, axis=1) out = np.concatenate([out_v, out_h], axis=1) z = self.fc(self.dropout(out)) x = np.concatenate([z, user_emb], axis=1) q_prime_i = np.squeeze(self.Q_prime(item_id)) b = np.squeeze(self.b(item_id)) res = (x * q_prime_i).sum(1) + b return res
def get_start_logits(self, contextual_embedding, p_mask): """ Parameters ---------- contextual_embedding Shape (batch_size, sequence_length, C) Returns ------- start_logits Shape (batch_size, sequence_length) """ start_scores = np.squeeze(self.start_scores(contextual_embedding), -1) start_logits = masked_logsoftmax(start_scores, mask=p_mask, axis=-1) return start_logits
def forward(self, queries, keys, values, valid_lens): queries, keys = self.W_q(queries), self.W_k(keys) # After dimension expansion, shape of `queries`: (`batch_size`, no. of # queries, 1, `num_hiddens`) and shape of `keys`: (`batch_size`, 1, # no. of key-value pairs, `num_hiddens`). Sum them up with # broadcasting features = np.expand_dims(queries, axis=2) + np.expand_dims( keys, axis=1) features = np.tanh(features) # There is only one output of `self.w_v`, so we remove the last # one-dimensional entry from the shape. Shape of `scores`: # (`batch_size`, no. of queries, no. of key-value pairs) scores = np.squeeze(self.w_v(features), axis=-1) self.attention_weights = masked_softmax(scores, valid_lens) # Shape of `values`: (`batch_size`, no. of key-value pairs, value # dimension) return npx.batch_dot(self.dropout(self.attention_weights), values)
def forward(self, inputs): # Concatenate the output of two embedding layers with shape of # (batch size, no. of words, word vector dimension) by word vector embeddings = np.concatenate(( self.embedding(inputs), self.constant_embedding(inputs)), axis=2) # According to the input format required by Conv1D, the word vector # dimension, that is, the channel dimension of the one-dimensional # convolutional layer, is transformed into the previous dimension embeddings = embeddings.transpose(0, 2, 1) # For each one-dimensional convolutional layer, after max-over-time # pooling, an ndarray with the shape of (batch size, channel size, 1) # can be obtained. Use the flatten function to remove the last # dimension and then concatenate on the channel dimension encoding = np.concatenate([ np.squeeze(self.pool(conv(embeddings)), axis=-1) for conv in self.convs], axis=1) # After applying the dropout method, use a fully connected layer to # obtain the output outputs = self.decoder(self.dropout(encoding)) return outputs
def test_np_squeeze(): config = [((), None), ((), -1), ((), 0), ((4, 1, 2), None), ((1, 1, 1), None), ((1, 0, 1, 5), 2), ((1, 0, 1, 1), (-1, -4))] class TestSqueeze(HybridBlock): def __init__(self, axis): super(TestSqueeze, self).__init__() self._axis = axis def hybrid_forward(self, F, x): return F.np.squeeze(x, axis=self._axis) for shape, axis in config: data_np = _np.random.uniform(size=shape) data_mx = np.array(data_np, dtype=data_np.dtype) ret_np = _np.squeeze(data_np, axis=axis) ret_mx = np.squeeze(data_mx, axis=axis) assert_almost_equal(ret_mx.asnumpy(), ret_np, rtol=1e-5, atol=1e-6, use_broadcast=False) net = TestSqueeze(axis) for hybrid in [False, True]: if hybrid: net.hybridize() data_mx.attach_grad() with mx.autograd.record(): ret_mx = net(data_mx) assert_almost_equal(ret_mx.asnumpy(), ret_np, rtol=1e-5, atol=1e-6, use_broadcast=False) ret_mx.backward() assert_almost_equal(data_mx.grad.asnumpy(), _np.ones_like(data_np), rtol=1e-5, atol=1e-6, use_broadcast=False)
def multibox_detection(cls_probs, offset_preds, anchors, nms_threshold=0.5, pos_threshold=0.00999999978): device, batch_size = cls_probs.ctx, cls_probs.shape[0] anchors = np.squeeze(anchors, axis=0) num_classes, num_anchors = cls_probs.shape[1], cls_probs.shape[2] out = [] # print(offset_preds) for i in range(batch_size): cls_prob, offset_pred = cls_probs[i], offset_preds[i].reshape(-1, 4) conf, class_id = np.max(cls_prob[1:], 0), np.argmax(cls_prob[1:], 0) predicted_bb = offset_inverse(anchors, offset_pred) keep = nms(predicted_bb, conf, 0.5) print(keep) # Find all non_keep indices and set the class_id to background all_idx = np.arange(num_anchors, dtype=np.int32, ctx=device) combined = np.concatenate((keep, all_idx)) unique, counts = np.unique(combined, return_counts=True) print(unique, " . ", counts) non_keep = unique[counts == 1] all_id_sorted = np.concatenate((keep, non_keep)) class_id[non_keep] = -1 print(class_id) class_id = class_id[all_id_sorted].astype('float32') print(class_id) conf, predicted_bb = conf[all_id_sorted], predicted_bb[all_id_sorted] print(conf) print(predicted_bb) # threshold to be a positive prediction below_min_idx = (conf < pos_threshold) class_id[below_min_idx] = -1 conf[below_min_idx] = 1 - conf[below_min_idx] pred_info = np.concatenate((np.expand_dims( class_id, axis=1), np.expand_dims(conf, axis=1), predicted_bb), axis=1) out.append(pred_info) return np.stack(out)
def forward(self, source_encoded: np.ndarray, source_encoded_length: np.ndarray) -> np.ndarray: """ Transformation to the length ratio. Returns a vector. :param source_encoded: Encoder representation for n elements. Shape: (n, source_encoded_length, hidden_size). :param source_encoded_length: A vector of encoded sequence lengths. Shape: (n,). :return: Predictions of the ratio length(hypothesis)/length(reference). Shape(n, 1). """ # source_masked: (n, source_encoded_length, hidden_size) source_masked = npx.sequence_mask( source_encoded, axis=1, sequence_length=source_encoded_length, use_sequence_length=True, value=0.) # calculate the proper means of encoded sources # data: (n, hidden_size) data = np.sum(source_masked, axis=1, keepdims=False) / np.reshape( source_encoded_length, (-1, 1)) # MLP. Shape: (n, 1) data = self.layers(data) # Shape: (n,) return np.squeeze(data)
def forward(self, source: np.ndarray, source_length: np.ndarray, restrict_lexicon: Optional[lexicon.TopKLexicon], raw_constraint_list: List[Optional[constrained.RawConstraintList]], raw_avoid_list: List[Optional[constrained.RawConstraintList]], max_output_lengths: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[Optional[np.ndarray]], List[Optional[constrained.ConstrainedHypothesis]]]: """ Translates multiple sentences using beam search. :param source: Source ids. Shape: (batch_size, bucket_key, num_factors). :param source_length: Valid source lengths. Shape: (batch_size,). :param restrict_lexicon: Lexicon to use for vocabulary restriction. :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs) that must appear in each output. :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs) that must NOT appear in each output. :param max_output_lengths: ndarray of maximum output lengths per input in source. Shape: (batch_size,). Dtype: int32. :return List of best hypotheses indices, list of best word indices, array of accumulated length-normalized negative log-probs, hypotheses lengths, predicted lengths of references (if any), constraints (if any). """ batch_size = source.shape[0] logger.debug("beam_search batch size: %d", batch_size) # Maximum beam search iterations (determined by longest input with eos) max_iterations = max_output_lengths.max().item() logger.debug("max beam search iterations: %d", max_iterations) sample_best_hyp_indices = None if self._sample is not None: utils.check_condition(restrict_lexicon is None, "Sampling is not available when working with a restricted lexicon.") sample_best_hyp_indices = np.arange(0, batch_size * self.beam_size, dtype='int32', ctx=self.context) # General data structure: batch_size * beam_size blocks in total; # a full beam for each sentence, followed by the next beam-block for the next sentence and so on # best word_indices (also act as input: (batch*beam, num_target_factors best_word_indices = np.full((batch_size * self.beam_size, self.num_target_factors), fill_value=self.bos_id, ctx=self.context, dtype='int32') # offset for hypothesis indices in batch decoding offset = np.repeat(np.arange(0, batch_size * self.beam_size, self.beam_size, dtype='int32', ctx=self.context), self.beam_size) # locations of each batch item when first dimension is (batch * beam) batch_indices = np.arange(0, batch_size * self.beam_size, self.beam_size, dtype='int32', ctx=self.context) first_step_mask = np.full((batch_size * self.beam_size, 1), fill_value=np.inf, ctx=self.context, dtype=self.dtype) first_step_mask[batch_indices] = 0.0 # Best word and hypotheses indices across beam search steps from topk operation. best_hyp_indices_list = [] # type: List[np.ndarray] best_word_indices_list = [] # type: List[np.ndarray] lengths = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32') finished = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32') # Extending max_output_lengths to shape (batch_size * beam_size, 1) max_output_lengths = np.repeat(np.expand_dims(max_output_lengths, axis=1), self.beam_size, axis=0) # scores_accumulated: chosen smallest scores in scores (ascending). scores_accumulated = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype=self.dtype) output_vocab_size = self.output_vocab_size # If using a top-k lexicon, select param rows for logit computation that correspond to the # target vocab for this sentence. vocab_slice_ids = None # type: Optional[np.ndarrays] if restrict_lexicon: source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2) vocab_slice_ids, output_vocab_size, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon, source_words, raw_constraint_list, self.eos_id, beam_size=1) pad_dist = np.full((batch_size * self.beam_size, output_vocab_size - 1), fill_value=np.inf, ctx=self.context, dtype=self.dtype) eos_dist = np.full((batch_size * self.beam_size, output_vocab_size), fill_value=np.inf, ctx=self.context, dtype=self.dtype) eos_dist[:, C.EOS_ID] = 0 unk_dist = None if self.prevent_unk: unk_dist = np.zeros_like(eos_dist) unk_dist[:, C.UNK_ID] = np.inf # pylint: disable=E1137 # Initialize the beam to track constraint sets, where target-side lexical constraints are present constraints = constrained.init_batch(raw_constraint_list, self.beam_size, self.bos_id, self.eos_id) if self.global_avoid_trie or any(raw_avoid_list): avoid_states = constrained.AvoidBatch(batch_size, self.beam_size, avoid_list=raw_avoid_list, global_avoid_trie=self.global_avoid_trie) avoid_states.consume(best_word_indices[:, 0]) # constraints operate only on primary target factor # (0) encode source sentence, returns a list model_states, estimated_reference_lengths = self._inference.encode_and_initialize(source, source_length) # repeat states to beam_size model_states = _repeat_states(model_states, self.beam_size, self._inference.state_structure()) # repeat estimated_reference_lengths to shape (batch_size * beam_size, 1) estimated_reference_lengths = np.repeat(estimated_reference_lengths, self.beam_size, axis=0) # Records items in the beam that are inactive. At the beginning (t==1), there is only one valid or active # item on the beam for each sentence inactive = np.zeros((batch_size * self.beam_size, 1), dtype='int32', ctx=self.context) t = 1 for t in range(1, max_iterations + 1): # max_iterations + 1 required to get correct results # (1) obtain next predictions and advance models' state # target_dists: (batch_size * beam_size, target_vocab_size) target_dists, model_states, target_factors = self._inference.decode_step(best_word_indices, model_states, vocab_slice_ids) # (2) Produces the accumulated cost of target words in each row. # There is special treatment for finished and inactive rows: inactive rows are inf everywhere; # finished rows are inf everywhere except column zero, which holds the accumulated model score scores, lengths = self._update_scores(target_dists, finished, inactive, scores_accumulated, lengths, max_output_lengths, unk_dist, pad_dist, eos_dist) # Mark entries that should be blocked as having a score of np.inf if self.global_avoid_trie or any(raw_avoid_list): block_indices = avoid_states.avoid() if len(block_indices) > 0: scores[block_indices] = np.inf if self._sample is not None: target_dists[block_indices] = np.inf # (3) Get beam_size winning hypotheses for each sentence block separately. Only look as # far as the active beam size for each sentence. if self._sample is not None: best_hyp_indices, best_word_indices, scores_accumulated = self._sample(scores, target_dists, finished, sample_best_hyp_indices) else: # On the first timestep, all hypotheses have identical histories, so force topk() to choose extensions # of the first row only by setting all other rows to inf if t == 1: scores += first_step_mask best_hyp_indices, best_word_indices, scores_accumulated = self._top(scores, offset) # Constraints for constrained decoding are processed sentence by sentence if any(raw_constraint_list): best_hyp_indices, best_word_indices, scores_accumulated, constraints, inactive = constrained.topk( t, batch_size, self.beam_size, inactive, scores, constraints, best_hyp_indices, best_word_indices, scores_accumulated) # Map from restricted to full vocab ids if needed if restrict_lexicon: best_word_indices = np.take(vocab_slice_ids, best_word_indices, axis=0) # (4) Normalize the scores of newly finished hypotheses. Note that after this until the # next call to topk(), hypotheses may not be in sorted order. _sort_inputs = [best_hyp_indices, best_word_indices, finished, scores_accumulated, lengths, estimated_reference_lengths] if target_factors is not None: _sort_inputs.append(target_factors) best_word_indices, finished, scores_accumulated, lengths, estimated_reference_lengths = \ self._sort_norm_and_update_finished(*_sort_inputs) # Collect best hypotheses, best word indices best_word_indices_list.append(best_word_indices) best_hyp_indices_list.append(best_hyp_indices) if self._should_stop(finished, batch_size): break # (5) update models' state with winning hypotheses (ascending) model_states = self._sort_states(best_hyp_indices, *model_states) logger.debug("Finished after %d out of %d steps.", t, max_iterations) # (9) Sort the hypotheses within each sentence (normalization for finished hyps may have unsorted them). scores_accumulated_shape = scores_accumulated.shape folded_accumulated_scores = scores_accumulated.reshape((batch_size, -1)) indices = np.argsort(folded_accumulated_scores.astype('float32', copy=False), axis=1).reshape((-1,)) best_hyp_indices = np.unravel_index(indices, scores_accumulated_shape)[0].astype('int32') + offset scores_accumulated = scores_accumulated.take(best_hyp_indices, axis=0) best_hyp_indices_list.append(best_hyp_indices) lengths = lengths.take(best_hyp_indices, axis=0) all_best_hyp_indices = np.stack(best_hyp_indices_list, axis=1) all_best_word_indices = np.stack(best_word_indices_list, axis=2) constraints = [constraints[x] for x in best_hyp_indices.tolist()] return all_best_hyp_indices, \ all_best_word_indices, \ scores_accumulated, \ lengths.astype('int32', copy=False), \ estimated_reference_lengths, \ constraints
def forward(self, source: np.ndarray, source_length: np.ndarray, restrict_lexicon: Optional[lexicon.TopKLexicon], raw_constraint_list: List[Optional[constrained.RawConstraintList]], raw_avoid_list: List[Optional[constrained.RawConstraintList]], max_output_lengths: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[Optional[np.ndarray]], List[Optional[constrained.ConstrainedHypothesis]]]: """ Translates a single sentence (batch_size=1) using greedy search. :param source: Source ids. Shape: (batch_size=1, bucket_key, num_factors). :param source_length: Valid source lengths. Shape: (batch_size=1,). :param restrict_lexicon: Lexicon to use for vocabulary restriction. :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs) that must appear in each output. :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs) that must NOT appear in each output. :param max_output_lengths: ndarray of maximum output lengths per input in source. Shape: (batch_size=1,). Dtype: int32. :return List of best hypotheses indices, list of best word indices, array of accumulated length-normalized negative log-probs, hypotheses lengths, predicted lengths of references (if any), constraints (if any). """ batch_size = source.shape[0] assert batch_size == 1, "Greedy Search does not support batch_size != 1" # Maximum search iterations (determined by longest input with eos) max_iterations = max_output_lengths.max().item() logger.debug("max greedy search iterations: %d", max_iterations) # best word_indices (also act as input: (batch*beam, num_target_factors best_word_index = np.full((batch_size, self.num_target_factors), fill_value=self.bos_id, ctx=self.context, dtype='int32') outputs = [] # type: List[np.ndarray] vocab_slice_ids = None # type: Optional[np.ndarray] # If using a top-k lexicon, select param rows for logit computation that correspond to the # target vocab for this sentence. if restrict_lexicon: source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2) vocab_slice_ids, _, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon, source_words, raw_constraint_list, self.eos_id, beam_size=1) # (0) encode source sentence, returns a list model_states, _ = self._inference.encode_and_initialize(source, source_length) # TODO: check for disabled predicted output length t = 1 for t in range(1, max_iterations + 1): scores, model_states, target_factors = self._inference.decode_step(best_word_index, model_states, vocab_slice_ids=vocab_slice_ids) # shape: (batch*beam=1, 1) best_word_index = self.work_block(scores, vocab_slice_ids, target_factors) outputs.append(best_word_index) if best_word_index == self.eos_id or best_word_index == C.PAD_ID: break logger.debug("Finished after %d out of %d steps.", t, max_iterations) # shape: (1, num_factors, length) stacked_outputs = np.stack(outputs, axis=2) length = np.array([t], dtype='int32') # shape (1,) hyp_indices = np.zeros((1, t + 1), dtype='int32') score = np.array([-1.]) # TODO: return unnormalized proper score return hyp_indices, stacked_outputs, score, length, None, [] # type: ignore