def match_anchor_to_bbox(ground_truth, anchors, device, iou_threshold=0.5):
    """Assign ground-truth bounding boxes to anchor boxes similar to them."""
    num_anchors, num_gt_boxes = anchors.shape[0], ground_truth.shape[0]

    # Element `x_ij` in the `i^th` row and `j^th` column is the IoU
    # of the anchor box `anc_i` to the ground-truth bounding box `box_j`

    jaccard = box_iou(anchors, ground_truth)
    # Initialize the tensor to hold assigned ground truth bbox for each anchor
    anchors_bbox_map = np.full((num_anchors, ),
                               fill_value=-1,
                               dtype=np.int32,
                               ctx=device)
    # Assign ground truth bounding box according to the threshold
    max_ious, indices = np.max(jaccard, axis=1), np.argmax(jaccard, axis=1)

    anc_i = np.nonzero(max_ious >= iou_threshold)[0]
    box_j = indices[max_ious >= iou_threshold]
    anchors_bbox_map[anc_i] = box_j

    # Find the largest iou for each bbox
    col_discard = np.full((num_anchors, ), -1)
    row_discard = np.full((num_gt_boxes, ), -1)

    for _ in range(num_gt_boxes):
        max_idx = np.argmax(jaccard)
        box_idx = (max_idx % num_gt_boxes).astype('int32')
        anc_idx = (max_idx / num_gt_boxes).astype('int32')
        anchors_bbox_map[anc_idx] = box_idx
        jaccard[:, box_idx] = col_discard
        jaccard[anc_idx, :] = row_discard

    return anchors_bbox_map
Exemplo n.º 2
0
def test_full():
    data1 = np.full((INT_OVERFLOW, 2), np.array([1, 2]))
    assert data1.shape == (INT_OVERFLOW, 2)
    assert data1[-1, 0] == 1 and data1[-1, 1] == 2
    data2 = np.full((2, INT_OVERFLOW), 3)
    assert data2.shape == (2, INT_OVERFLOW)
    assert data2[-1, -1] == 3
def test_to_tensor():
    # 3D Input
    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert_almost_equal(
        out_nd.asnumpy(),
        np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))

    # 4D Input
    data_in = np.random.uniform(0, 255,
                                (5, 300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert_almost_equal(
        out_nd.asnumpy(),
        np.transpose(data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2)))

    # Invalid Input
    invalid_data_in = np.random.uniform(
        0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8)
    transformer = transforms.ToTensor()
    assertRaises(MXNetError, transformer, invalid_data_in)

    # Bounds (0->0, 255->1)
    data_in = np.zeros((10, 20, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert same(
        out_nd.asnumpy(),
        np.transpose(np.zeros(data_in.shape, dtype=np.float32), (2, 0, 1)))

    data_in = np.full((10, 20, 3), 255).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert same(
        out_nd.asnumpy(),
        np.transpose(np.ones(data_in.shape, dtype=np.float32), (2, 0, 1)))
Exemplo n.º 4
0
def _get_vocab_slice_ids(restrict_lexicon: Optional[lexicon.TopKLexicon],
                         source_words: np.ndarray,
                         raw_constraint_list: List[Optional[constrained.RawConstraintList]],
                         eos_id: int,
                         beam_size: int) -> Tuple[np.ndarray, int, List[Optional[constrained.RawConstraintList]]]:
    vocab_slice_ids = np.array(restrict_lexicon.get_trg_ids(source_words.astype("int32", copy=False).asnumpy()), dtype='int32')
    ctx = source_words.ctx
    if any(raw_constraint_list):
        # Add the constraint IDs to the list of permissibled IDs, and then project them into the reduced space
        constraint_ids = np.array(word_id for sent in raw_constraint_list for phr in sent for word_id in phr)
        vocab_slice_ids = onp.lib.arraysetops.union1d(vocab_slice_ids, constraint_ids)  # type: ignore
        full_to_reduced = dict((val, i) for i, val in enumerate(vocab_slice_ids))
        raw_constraint_list = [[[full_to_reduced[x] for x in phr] for phr in sent] for sent in
                               raw_constraint_list]
    # pad to a multiple of 8.
    vocab_slice_ids = np.pad(vocab_slice_ids, (0, 7 - ((len(vocab_slice_ids) - 1) % 8)),
                             mode='constant', constant_values=eos_id)

    vocab_slice_ids_shape = vocab_slice_ids.shape[0]
    if vocab_slice_ids_shape < beam_size + 1:
        # This fixes an edge case for toy models, where the number of vocab ids from the lexicon is
        # smaller than the beam size.
        logger.warning("Padding vocab_slice_ids (%d) with EOS to have at least %d+1 elements to expand",
                       vocab_slice_ids_shape, beam_size)
        n = beam_size - vocab_slice_ids_shape + 1
        vocab_slice_ids = np.concatenate((vocab_slice_ids, np.full((n,), fill_value=eos_id, ctx=ctx, dtype='int32')),
                                         axis=0)

    logger.debug(f'decoder softmax size: {vocab_slice_ids_shape}')
    return vocab_slice_ids, vocab_slice_ids_shape, raw_constraint_list
Exemplo n.º 5
0
def test_prevent_unk_update_scores():
    pytest.importorskip("mxnet")
    from mxnet import np
    import sockeye.beam_search

    vocab_size = 10
    batch_beam_size = 3
    us = sockeye.beam_search.UpdateScores()
    pad_dist = np.full((batch_beam_size, vocab_size - 1),
                       fill_value=np.inf,
                       dtype='float32')
    eos_dist = np.full((batch_beam_size, vocab_size),
                       fill_value=np.inf,
                       dtype='float32')
    eos_dist[:, C.EOS_ID] = 0
    unk_dist = np.zeros_like(eos_dist)
    unk_dist[:, C.UNK_ID] = np.inf  # pylint: disable=E1137

    lengths = np.array([[0], [1], [0]], dtype='int32')
    max_lengths = np.array([[1], [2], [3]],
                           dtype='int32')  # first on reaches max length
    scores_accumulated = np.ones((3, 1), dtype='float32')
    finished = np.array(
        [
            [0],  # not finished
            [1],  # finished
            [0]
        ],  # not finished
        dtype='int32')
    inactive = np.zeros_like(finished)
    target_dists = np.random.uniform(0, 1, (3, vocab_size))

    scores, lengths = us(target_dists, finished, inactive, scores_accumulated,
                         lengths, max_lengths, unk_dist, pad_dist, eos_dist)
    scores = scores
    lengths = lengths.reshape((-1, ))

    assert (lengths == np.array(
        [[1], [1], [1]])).all()  # all lengths but finished updated + 1
    assert (scores[0] == (1. + target_dists[0] +
                          eos_dist)).all()  # 1 reached max length, force eos
    assert (scores[1] == np.array([1.] + pad_dist[1].tolist())
            ).all()  # 2 finished, force pad, keep score
    assert scores[2, C.UNK_ID] == np.inf  # 3 scores of <unk> should be np.inf
    assert (scores[2] == (1. + target_dists[2] +
                          unk_dist[2])).all()  # 3 scores + previous scores
Exemplo n.º 6
0
def test_softmax():
    input_data = np.ones((SMALL_Y, LARGE_X))
    for axis in [0, 1]:
        true_output = np.full((SMALL_Y, LARGE_X), (1 / input_data.shape[axis]))
        output = npx.softmax(input_data, axis=axis)
        assert_almost_equal(output.asnumpy(),
                            true_output,
                            rtol=1e-5,
                            atol=1e-5)
Exemplo n.º 7
0
def test_power():
    A = np.full((2, INT_OVERFLOW), 2)
    B = np.ones((2, INT_OVERFLOW))
    B[-1, -1] = 3
    A.attach_grad()
    B.attach_grad()
    with mx.autograd.record():
        C = np.power(A, B)
        C.backward()
    assert C.shape == A.shape
    assert C[-1, -1] == 8
    assert A.grad.shape == A.shape
    assert A.grad[-1, -1] == 12
    assert B.grad.shape == B.shape
    assert_almost_equal(B.grad[-1, -1], 2**3 * np.log(2), rtol=1e-5, atol=1e-5)
Exemplo n.º 8
0
    def forward(self,
                source: np.ndarray,
                source_length: np.ndarray,
                restrict_lexicon: Optional[lexicon.TopKLexicon],
                raw_constraint_list: List[Optional[constrained.RawConstraintList]],
                raw_avoid_list: List[Optional[constrained.RawConstraintList]],
                max_output_lengths: np.ndarray) -> Tuple[np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         List[Optional[np.ndarray]],
                                                         List[Optional[constrained.ConstrainedHypothesis]]]:
        """
        Translates multiple sentences using beam search.

        :param source: Source ids. Shape: (batch_size, bucket_key, num_factors).
        :param source_length: Valid source lengths. Shape: (batch_size,).
        :param restrict_lexicon: Lexicon to use for vocabulary restriction.
        :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs)
               that must appear in each output.
        :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs)
               that must NOT appear in each output.
        :param max_output_lengths: ndarray of maximum output lengths per input in source.
                Shape: (batch_size,). Dtype: int32.
        :return List of best hypotheses indices, list of best word indices,
                array of accumulated length-normalized negative log-probs, hypotheses lengths,
                predicted lengths of references (if any), constraints (if any).
        """
        batch_size = source.shape[0]
        logger.debug("beam_search batch size: %d", batch_size)

        # Maximum beam search iterations (determined by longest input with eos)
        max_iterations = max_output_lengths.max().item()
        logger.debug("max beam search iterations: %d", max_iterations)

        sample_best_hyp_indices = None
        if self._sample is not None:
            utils.check_condition(restrict_lexicon is None,
                                  "Sampling is not available when working with a restricted lexicon.")
            sample_best_hyp_indices = np.arange(0, batch_size * self.beam_size, dtype='int32', ctx=self.context)

        # General data structure: batch_size * beam_size blocks in total;
        # a full beam for each sentence, followed by the next beam-block for the next sentence and so on

        # best word_indices (also act as input: (batch*beam, num_target_factors
        best_word_indices = np.full((batch_size * self.beam_size, self.num_target_factors),
                                    fill_value=self.bos_id, ctx=self.context, dtype='int32')

        # offset for hypothesis indices in batch decoding
        offset = np.repeat(np.arange(0, batch_size * self.beam_size, self.beam_size,
                                     dtype='int32', ctx=self.context), self.beam_size)

        # locations of each batch item when first dimension is (batch * beam)
        batch_indices = np.arange(0, batch_size * self.beam_size, self.beam_size, dtype='int32', ctx=self.context)
        first_step_mask = np.full((batch_size * self.beam_size, 1),
                                  fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        first_step_mask[batch_indices] = 0.0

        # Best word and hypotheses indices across beam search steps from topk operation.
        best_hyp_indices_list = []  # type: List[np.ndarray]
        best_word_indices_list = []  # type: List[np.ndarray]

        lengths = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32')
        finished = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32')

        # Extending max_output_lengths to shape (batch_size * beam_size, 1)
        max_output_lengths = np.repeat(np.expand_dims(max_output_lengths, axis=1), self.beam_size, axis=0)

        # scores_accumulated: chosen smallest scores in scores (ascending).
        scores_accumulated = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype=self.dtype)

        output_vocab_size = self.output_vocab_size

        # If using a top-k lexicon, select param rows for logit computation that correspond to the
        # target vocab for this sentence.
        vocab_slice_ids = None  # type: Optional[np.ndarrays]
        if restrict_lexicon:
            source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2)
            vocab_slice_ids, output_vocab_size, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon,
                                                                                           source_words,
                                                                                           raw_constraint_list,
                                                                                           self.eos_id, beam_size=1)

        pad_dist = np.full((batch_size * self.beam_size, output_vocab_size - 1),
                           fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        eos_dist = np.full((batch_size * self.beam_size, output_vocab_size),
                           fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        eos_dist[:, C.EOS_ID] = 0
        unk_dist = None
        if self.prevent_unk:
            unk_dist = np.zeros_like(eos_dist)
            unk_dist[:, C.UNK_ID] = np.inf  # pylint: disable=E1137

        # Initialize the beam to track constraint sets, where target-side lexical constraints are present
        constraints = constrained.init_batch(raw_constraint_list, self.beam_size, self.bos_id, self.eos_id)

        if self.global_avoid_trie or any(raw_avoid_list):
            avoid_states = constrained.AvoidBatch(batch_size, self.beam_size,
                                                  avoid_list=raw_avoid_list,
                                                  global_avoid_trie=self.global_avoid_trie)
            avoid_states.consume(best_word_indices[:, 0])  # constraints operate only on primary target factor

        # (0) encode source sentence, returns a list
        model_states, estimated_reference_lengths = self._inference.encode_and_initialize(source, source_length)
        # repeat states to beam_size
        model_states = _repeat_states(model_states, self.beam_size, self._inference.state_structure())
        # repeat estimated_reference_lengths to shape (batch_size * beam_size, 1)
        estimated_reference_lengths = np.repeat(estimated_reference_lengths, self.beam_size, axis=0)

        # Records items in the beam that are inactive. At the beginning (t==1), there is only one valid or active
        # item on the beam for each sentence
        inactive = np.zeros((batch_size * self.beam_size, 1), dtype='int32', ctx=self.context)
        t = 1
        for t in range(1, max_iterations + 1):  # max_iterations + 1 required to get correct results
            # (1) obtain next predictions and advance models' state
            # target_dists: (batch_size * beam_size, target_vocab_size)
            target_dists, model_states, target_factors = self._inference.decode_step(best_word_indices,
                                                                                     model_states,
                                                                                     vocab_slice_ids)

            # (2) Produces the accumulated cost of target words in each row.
            # There is special treatment for finished and inactive rows: inactive rows are inf everywhere;
            # finished rows are inf everywhere except column zero, which holds the accumulated model score
            scores, lengths = self._update_scores(target_dists,
                                                  finished,
                                                  inactive,
                                                  scores_accumulated,
                                                  lengths,
                                                  max_output_lengths,
                                                  unk_dist,
                                                  pad_dist,
                                                  eos_dist)

            # Mark entries that should be blocked as having a score of np.inf
            if self.global_avoid_trie or any(raw_avoid_list):
                block_indices = avoid_states.avoid()
                if len(block_indices) > 0:
                    scores[block_indices] = np.inf
                    if self._sample is not None:
                        target_dists[block_indices] = np.inf

            # (3) Get beam_size winning hypotheses for each sentence block separately. Only look as
            # far as the active beam size for each sentence.
            if self._sample is not None:
                best_hyp_indices, best_word_indices, scores_accumulated = self._sample(scores,
                                                                                       target_dists,
                                                                                       finished,
                                                                                       sample_best_hyp_indices)
            else:
                # On the first timestep, all hypotheses have identical histories, so force topk() to choose extensions
                # of the first row only by setting all other rows to inf
                if t == 1:
                    scores += first_step_mask

                best_hyp_indices, best_word_indices, scores_accumulated = self._top(scores, offset)

            # Constraints for constrained decoding are processed sentence by sentence
            if any(raw_constraint_list):
                best_hyp_indices, best_word_indices, scores_accumulated, constraints, inactive = constrained.topk(
                    t,
                    batch_size,
                    self.beam_size,
                    inactive,
                    scores,
                    constraints,
                    best_hyp_indices,
                    best_word_indices,
                    scores_accumulated)

            # Map from restricted to full vocab ids if needed
            if restrict_lexicon:
                best_word_indices = np.take(vocab_slice_ids, best_word_indices, axis=0)

            # (4) Normalize the scores of newly finished hypotheses. Note that after this until the
            # next call to topk(), hypotheses may not be in sorted order.
            _sort_inputs = [best_hyp_indices, best_word_indices, finished, scores_accumulated, lengths,
                            estimated_reference_lengths]
            if target_factors is not None:
                _sort_inputs.append(target_factors)
            best_word_indices, finished, scores_accumulated, lengths, estimated_reference_lengths = \
                self._sort_norm_and_update_finished(*_sort_inputs)

            # Collect best hypotheses, best word indices
            best_word_indices_list.append(best_word_indices)
            best_hyp_indices_list.append(best_hyp_indices)

            if self._should_stop(finished, batch_size):
                break

            # (5) update models' state with winning hypotheses (ascending)
            model_states = self._sort_states(best_hyp_indices, *model_states)

        logger.debug("Finished after %d out of %d steps.", t, max_iterations)

        # (9) Sort the hypotheses within each sentence (normalization for finished hyps may have unsorted them).
        scores_accumulated_shape = scores_accumulated.shape
        folded_accumulated_scores = scores_accumulated.reshape((batch_size, -1))
        indices = np.argsort(folded_accumulated_scores.astype('float32', copy=False), axis=1).reshape((-1,))
        best_hyp_indices = np.unravel_index(indices, scores_accumulated_shape)[0].astype('int32') + offset
        scores_accumulated = scores_accumulated.take(best_hyp_indices, axis=0)
        best_hyp_indices_list.append(best_hyp_indices)
        lengths = lengths.take(best_hyp_indices, axis=0)
        all_best_hyp_indices = np.stack(best_hyp_indices_list, axis=1)
        all_best_word_indices = np.stack(best_word_indices_list, axis=2)
        constraints = [constraints[x] for x in best_hyp_indices.tolist()]

        return all_best_hyp_indices, \
               all_best_word_indices, \
               scores_accumulated, \
               lengths.astype('int32', copy=False), \
               estimated_reference_lengths, \
               constraints
Exemplo n.º 9
0
    def forward(self,
                source: np.ndarray,
                source_length: np.ndarray,
                restrict_lexicon: Optional[lexicon.TopKLexicon],
                raw_constraint_list: List[Optional[constrained.RawConstraintList]],
                raw_avoid_list: List[Optional[constrained.RawConstraintList]],
                max_output_lengths: np.ndarray) -> Tuple[np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         List[Optional[np.ndarray]],
                                                         List[Optional[constrained.ConstrainedHypothesis]]]:
        """
        Translates a single sentence (batch_size=1) using greedy search.

        :param source: Source ids. Shape: (batch_size=1, bucket_key, num_factors).
        :param source_length: Valid source lengths. Shape: (batch_size=1,).
        :param restrict_lexicon: Lexicon to use for vocabulary restriction.
        :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs)
                that must appear in each output.
        :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs)
                that must NOT appear in each output.
        :param max_output_lengths: ndarray of maximum output lengths per input in source.
                Shape: (batch_size=1,). Dtype: int32.
        :return List of best hypotheses indices, list of best word indices,
                array of accumulated length-normalized negative log-probs, hypotheses lengths,
                predicted lengths of references (if any), constraints (if any).
        """
        batch_size = source.shape[0]
        assert batch_size == 1, "Greedy Search does not support batch_size != 1"

        # Maximum  search iterations (determined by longest input with eos)
        max_iterations = max_output_lengths.max().item()
        logger.debug("max greedy search iterations: %d", max_iterations)

        # best word_indices (also act as input: (batch*beam, num_target_factors
        best_word_index = np.full((batch_size, self.num_target_factors),
                                  fill_value=self.bos_id, ctx=self.context, dtype='int32')
        outputs = []  # type: List[np.ndarray]

        vocab_slice_ids = None  # type: Optional[np.ndarray]
        # If using a top-k lexicon, select param rows for logit computation that correspond to the
        # target vocab for this sentence.
        if restrict_lexicon:
            source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2)
            vocab_slice_ids, _, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon, source_words,
                                                                           raw_constraint_list,
                                                                           self.eos_id, beam_size=1)

        # (0) encode source sentence, returns a list
        model_states, _ = self._inference.encode_and_initialize(source, source_length)
        # TODO: check for disabled predicted output length

        t = 1
        for t in range(1, max_iterations + 1):
            scores, model_states, target_factors = self._inference.decode_step(best_word_index,
                                                                               model_states,
                                                                               vocab_slice_ids=vocab_slice_ids)
            # shape: (batch*beam=1, 1)
            best_word_index = self.work_block(scores, vocab_slice_ids, target_factors)
            outputs.append(best_word_index)

            if best_word_index == self.eos_id or best_word_index == C.PAD_ID:
                break

        logger.debug("Finished after %d out of %d steps.", t, max_iterations)

        # shape: (1, num_factors, length)
        stacked_outputs = np.stack(outputs, axis=2)
        length = np.array([t], dtype='int32')  # shape (1,)
        hyp_indices = np.zeros((1, t + 1), dtype='int32')
        score = np.array([-1.])  # TODO: return unnormalized proper score

        return hyp_indices, stacked_outputs, score, length, None, []  # type: ignore
Exemplo n.º 10
0
def test_get_training_data_iters():
    pytest.importorskip('mxnet')
    from sockeye import data_io
    from mxnet import np
    from sockeye.test_utils import tmp_digits_dataset
    train_line_count = 100
    train_line_count_empty = 0
    train_max_length = 30
    dev_line_count = 20
    dev_max_length = 30
    expected_mean = 1.0
    expected_std = 0.0
    test_line_count = 20
    test_line_count_empty = 0
    test_max_length = 30
    batch_size = 5
    num_source_factors = num_target_factors = 1
    with tmp_digits_dataset("tmp_corpus",
                            train_line_count, train_line_count_empty, train_max_length - C.SPACE_FOR_XOS,
                            dev_line_count, dev_max_length - C.SPACE_FOR_XOS,
                            test_line_count, test_line_count_empty,
                            test_max_length - C.SPACE_FOR_XOS) as data:
        # tmp common vocab
        vcb = vocab.build_from_paths([data['train_source'], data['train_target']])

        train_iter, val_iter, config_data, data_info = data_io.get_training_data_iters(
            sources=[data['train_source']],
            targets=[data['train_target']],
            validation_sources=[data['dev_source']],
            validation_targets=[data['dev_target']],
            source_vocabs=[vcb],
            target_vocabs=[vcb],
            source_vocab_paths=[None],
            target_vocab_paths=[None],
            shared_vocab=True,
            batch_size=batch_size,
            batch_type=C.BATCH_TYPE_SENTENCE,
            batch_num_devices=1,
            max_seq_len_source=train_max_length,
            max_seq_len_target=train_max_length,
            bucketing=True,
            bucket_width=10)
        assert isinstance(train_iter, data_io.ParallelSampleIter)
        assert isinstance(val_iter, data_io.ParallelSampleIter)
        assert isinstance(config_data, data_io.DataConfig)
        assert data_info.sources == [data['train_source']]
        assert data_info.targets == [data['train_target']]
        assert data_info.source_vocabs == [None]
        assert data_info.target_vocabs == [None]
        assert config_data.data_statistics.max_observed_len_source == train_max_length
        assert config_data.data_statistics.max_observed_len_target == train_max_length
        assert np.isclose(config_data.data_statistics.length_ratio_mean, expected_mean)
        assert np.isclose(config_data.data_statistics.length_ratio_std, expected_std)

        assert train_iter.batch_size == batch_size
        assert val_iter.batch_size == batch_size
        assert train_iter.default_bucket_key == (train_max_length, train_max_length)
        assert val_iter.default_bucket_key == (dev_max_length, dev_max_length)
        assert train_iter.dtype == 'float32'

        # test some batches
        bos_id = vcb[C.BOS_SYMBOL]
        eos_id = vcb[C.EOS_SYMBOL]
        expected_first_target_symbols = np.full((batch_size, 1), bos_id, dtype='float32')
        for epoch in range(2):
            while train_iter.iter_next():
                batch = train_iter.next()
                assert isinstance(batch, data_io.Batch)
                source = batch.source
                target = batch.target
                label = batch.labels[C.TARGET_LABEL_NAME]  # TODO: still 2-shape: (batch, length)
                length_ratio_label = batch.labels[C.LENRATIO_LABEL_NAME]
                assert source.shape[0] == target.shape[0] == label.shape[0] == batch_size
                assert source.shape[2] == target.shape[2] == num_source_factors == num_target_factors
                # target first symbol should be BOS
                # each source sequence contains one EOS symbol
                assert np.sum(source == eos_id) == batch_size
                assert np.array_equal(target[:, 0], expected_first_target_symbols)
                # label first symbol should be 2nd target symbol
                assert np.array_equal(label[:, 0], target[:, 1, 0])
                # each label sequence contains one EOS symbol
                assert np.sum(label == eos_id) == batch_size
            train_iter.reset()