示例#1
0
def test_seq_sort(name, train_params, translate_params, use_prepared_data,
                  n_source_factors, n_target_factors, perplexity_thresh, bleu_thresh):
    """Task: sort short sequences of digits"""
    with tmp_digits_dataset("test_seq_sort.",
                            _TRAIN_LINE_COUNT, _TRAIN_LINE_COUNT_EMPTY, _LINE_MAX_LENGTH,
                            _DEV_LINE_COUNT, _LINE_MAX_LENGTH,
                            _TEST_LINE_COUNT, _TEST_LINE_COUNT_EMPTY, _TEST_MAX_LENGTH,
                            sort_target=True, seed_train=_SEED_TRAIN_DATA, seed_dev=_SEED_DEV_DATA,
                            with_n_source_factors=n_source_factors,
                            with_n_target_factors=n_target_factors) as data:
        data = check_train_translate(train_params=train_params,
                                     translate_params=translate_params,
                                     data=data,
                                     use_prepared_data=use_prepared_data,
                                     max_seq_len=_LINE_MAX_LENGTH,
                                     compare_output=True,
                                     seed=seed)

        # get best validation perplexity
        metrics = sockeye.utils.read_metrics_file(os.path.join(data['model'], C.METRICS_NAME))
        perplexity = min(m[C.PERPLEXITY + '-val'] for m in metrics)

        # compute metrics
        hypotheses = [json['translation'] for json in data['test_outputs']]
        hypotheses_restricted = [json['translation'] for json in data['test_outputs_restricted']]
        bleu = sockeye.evaluate.raw_corpus_bleu(hypotheses=hypotheses, references=data['test_targets'])
        chrf = sockeye.evaluate.raw_corpus_chrf(hypotheses=hypotheses, references=data['test_targets'])
        bleu_restrict = sockeye.evaluate.raw_corpus_bleu(hypotheses=hypotheses_restricted,
                                                         references=data['test_targets'])

        logger.info("test: %s", name)
        logger.info("perplexity=%f, bleu=%f, bleu_restrict=%f chrf=%f", perplexity, bleu, bleu_restrict, chrf)
        assert perplexity <= perplexity_thresh
        assert bleu >= bleu_thresh
        assert bleu_restrict >= bleu_thresh
示例#2
0
def test_other_clis(train_params: str, translate_params: str):
    """
    Task: test CLIs and core features other than train & translate.
    """
    with tmp_digits_dataset(prefix="test_other_clis",
                            train_line_count=_TRAIN_LINE_COUNT,
                            train_line_count_empty=_TRAIN_LINE_COUNT_EMPTY,
                            train_max_length=_LINE_MAX_LENGTH,
                            dev_line_count=_DEV_LINE_COUNT,
                            dev_max_length=_LINE_MAX_LENGTH,
                            test_line_count=_TEST_LINE_COUNT,
                            test_line_count_empty=0,
                            test_max_length=_TEST_MAX_LENGTH) as data:
        # train a minimal default model
        data = run_train_translate(train_params=train_params,
                                   translate_params=translate_params,
                                   data=data,
                                   max_seq_len=_LINE_MAX_LENGTH,
                                   use_pytorch=True)

        _test_checkpoint_decoder(data['dev_source'], data['dev_target'],
                                 data['model'])
        _test_mc_dropout(data['model'])
        _test_parameter_averaging(data['model'])
        _test_evaluate_cli(data['test_outputs'], data['test_target'])
示例#3
0
def test_seq_copy(use_pytorch: bool, train_params: str, translate_params: str,
                  use_prepared_data: bool, n_source_factors: int,
                  n_target_factors: int):
    """
    Task: copy short sequences of digits
    """

    with tmp_digits_dataset(prefix="test_seq_copy",
                            train_line_count=_TRAIN_LINE_COUNT,
                            train_line_count_empty=_TRAIN_LINE_COUNT_EMPTY,
                            train_max_length=_LINE_MAX_LENGTH,
                            dev_line_count=_DEV_LINE_COUNT,
                            dev_max_length=_LINE_MAX_LENGTH,
                            test_line_count=_TEST_LINE_COUNT,
                            test_line_count_empty=_TEST_LINE_COUNT_EMPTY,
                            test_max_length=_TEST_MAX_LENGTH,
                            sort_target=False,
                            with_n_source_factors=n_source_factors,
                            with_n_target_factors=n_target_factors) as data:

        # TODO: Here we temporarily switch off comparing translation and scoring scores, which
        # sometimes produces inconsistent results for --batch-size > 1 (see issue #639 on github).
        check_train_translate(train_params=train_params,
                              translate_params=translate_params,
                              data=data,
                              use_prepared_data=use_prepared_data,
                              max_seq_len=_LINE_MAX_LENGTH,
                              compare_output=False,
                              use_pytorch=use_pytorch)
示例#4
0
def test_mx_pt_eq_training_data():
    pytest.importorskip("mxnet")
    from sockeye import data_io

    train_line_count = 100
    train_line_count_empty = 0
    train_max_length = 30
    dev_line_count = 20
    dev_max_length = 30
    test_line_count = 20
    test_line_count_empty = 0
    test_max_length = 30
    batch_size = 5
    with tmp_digits_dataset("tmp_corpus", train_line_count,
                            train_line_count_empty,
                            train_max_length - C.SPACE_FOR_XOS, dev_line_count,
                            dev_max_length - C.SPACE_FOR_XOS, test_line_count,
                            test_line_count_empty,
                            test_max_length - C.SPACE_FOR_XOS) as data:

        vcb = vocab.build_from_paths(
            [data['train_source'], data['train_target']])

        train_iters = {}
        val_iters = {}

        # For each implementation
        for key, data_io_module in (('mx', data_io), ('pt', data_io_pt)):
            # Create iterators with no data permutation (preserve order for
            # batch equality checks)
            train_iter, val_iter, _, _ = data_io_module.get_training_data_iters(
                sources=[data['train_source']],
                targets=[data['train_target']],
                validation_sources=[data['dev_source']],
                validation_targets=[data['dev_target']],
                source_vocabs=[vcb],
                target_vocabs=[vcb],
                source_vocab_paths=[None],
                target_vocab_paths=[None],
                shared_vocab=True,
                batch_size=batch_size,
                batch_type=C.BATCH_TYPE_SENTENCE,
                max_seq_len_source=train_max_length,
                max_seq_len_target=train_max_length,
                bucketing=True,
                bucket_width=10,
                permute=False)
            train_iters[key] = train_iter
            val_iters[key] = val_iter

        # Check equality of all MXNet/PyTorch batches
        for iters in (train_iters, val_iters):
            for mx_batch, pt_batch in zip(iters['mx'], iters['pt']):
                _assert_mx_pt_batches_equal(mx_batch, pt_batch)
示例#5
0
def test_seq_copy(name, train_params, translate_params, use_prepared_data,
                  perplexity_thresh, bleu_thresh):
    """Task: copy short sequences of digits"""
    with tmp_digits_dataset(prefix="test_seq_copy",
                            train_line_count=_TRAIN_LINE_COUNT,
                            train_line_count_empty=_TRAIN_LINE_COUNT_EMPTY,
                            train_max_length=_LINE_MAX_LENGTH,
                            dev_line_count=_DEV_LINE_COUNT,
                            dev_max_length=_LINE_MAX_LENGTH,
                            test_line_count=_TEST_LINE_COUNT,
                            test_line_count_empty=_TEST_LINE_COUNT_EMPTY,
                            test_max_length=_TEST_MAX_LENGTH,
                            sort_target=False,
                            with_n_source_factors=0) as data:
        data = check_train_translate(train_params=train_params,
                                     translate_params=translate_params,
                                     data=data,
                                     use_prepared_data=use_prepared_data,
                                     max_seq_len=_LINE_MAX_LENGTH,
                                     compare_output=True,
                                     seed=seed)

        # get best validation perplexity
        metrics = sockeye.utils.read_metrics_file(
            os.path.join(data['model'], C.METRICS_NAME))
        perplexity = min(m[C.PERPLEXITY + '-val'] for m in metrics)

        # compute metrics
        hypotheses = [json['translation'] for json in data['test_outputs']]
        bleu = sockeye.evaluate.raw_corpus_bleu(
            hypotheses=hypotheses, references=data['test_targets'])
        chrf = sockeye.evaluate.raw_corpus_chrf(
            hypotheses=hypotheses, references=data['test_targets'])
        if 'test_outputs_restricted' in data:
            hypotheses_restricted = [
                json['translation'] for json in data['test_outputs_restricted']
            ]
            bleu_restrict = sockeye.evaluate.raw_corpus_bleu(
                hypotheses=hypotheses_restricted,
                references=data['test_targets'])
        else:
            bleu_restrict = None

        logger.info("================")
        logger.info("test results: %s", name)
        logger.info("perplexity=%f, bleu=%f, bleu_restrict=%f chrf=%f",
                    perplexity, bleu, bleu_restrict, chrf)
        logger.info("================\n")

        assert perplexity <= perplexity_thresh
        assert bleu >= bleu_thresh
        if bleu_restrict is not None:
            assert bleu_restrict >= bleu_thresh
示例#6
0
def test_get_training_data_iters():
    train_line_count = 100
    train_line_count_empty = 0
    train_max_length = 30
    dev_line_count = 20
    dev_max_length = 30
    expected_mean = 1.0
    expected_std = 0.0
    test_line_count = 20
    test_line_count_empty = 0
    test_max_length = 30
    batch_size = 5
    with tmp_digits_dataset("tmp_corpus", train_line_count,
                            train_line_count_empty,
                            train_max_length - C.SPACE_FOR_XOS, dev_line_count,
                            dev_max_length - C.SPACE_FOR_XOS, test_line_count,
                            test_line_count_empty,
                            test_max_length - C.SPACE_FOR_XOS) as data:
        # tmp common vocab
        vcb = vocab.build_from_paths(
            [data['train_source'], data['train_target']])

        train_iter, val_iter, config_data, data_info = data_io.get_training_data_iters(
            sources=[data['train_source']],
            target=data['train_target'],
            validation_sources=[data['dev_source']],
            validation_target=data['dev_target'],
            source_vocabs=[vcb],
            target_vocab=vcb,
            source_vocab_paths=[None],
            target_vocab_path=None,
            shared_vocab=True,
            batch_size=batch_size,
            batch_by_words=False,
            batch_num_devices=1,
            max_seq_len_source=train_max_length,
            max_seq_len_target=train_max_length,
            bucketing=True,
            bucket_width=10)
        assert isinstance(train_iter, data_io.ParallelSampleIter)
        assert isinstance(val_iter, data_io.ParallelSampleIter)
        assert isinstance(config_data, data_io.DataConfig)
        assert data_info.sources == [data['train_source']]
        assert data_info.target == data['train_target']
        assert data_info.source_vocabs == [None]
        assert data_info.target_vocab is None
        assert config_data.data_statistics.max_observed_len_source == train_max_length
        assert config_data.data_statistics.max_observed_len_target == train_max_length
        assert np.isclose(config_data.data_statistics.length_ratio_mean,
                          expected_mean)
        assert np.isclose(config_data.data_statistics.length_ratio_std,
                          expected_std)

        assert train_iter.batch_size == batch_size
        assert val_iter.batch_size == batch_size
        assert train_iter.default_bucket_key == (train_max_length,
                                                 train_max_length)
        assert val_iter.default_bucket_key == (dev_max_length, dev_max_length)
        assert train_iter.dtype == 'float32'

        # test some batches
        bos_id = vcb[C.BOS_SYMBOL]
        eos_id = vcb[C.EOS_SYMBOL]
        expected_first_target_symbols = np.full((batch_size, ),
                                                bos_id,
                                                dtype='float32')
        for epoch in range(2):
            while train_iter.iter_next():
                batch = train_iter.next()
                assert isinstance(batch, data_io.Batch)
                source = batch.source.asnumpy()
                target = batch.target.asnumpy()
                label = batch.labels[C.TARGET_LABEL_NAME].asnumpy()
                length_ratio_label = batch.labels[
                    C.LENRATIO_LABEL_NAME].asnumpy()
                assert source.shape[0] == target.shape[0] == label.shape[
                    0] == batch_size
                # target first symbol should be BOS
                # each source sequence contains one EOS symbol
                assert np.sum(source == eos_id) == batch_size
                assert np.array_equal(target[:, 0],
                                      expected_first_target_symbols)
                # label first symbol should be 2nd target symbol
                assert np.array_equal(label[:, 0], target[:, 1])
                # each label sequence contains one EOS symbol
                assert np.sum(label == eos_id) == batch_size
            train_iter.reset()
示例#7
0
def test_get_training_data_iters():
    from sockeye.test_utils import tmp_digits_dataset

    train_line_count = 100
    train_line_count_empty = 0
    train_max_length = 30
    dev_line_count = 20
    dev_max_length = 30
    expected_mean = 1.0
    expected_std = 0.0
    test_line_count = 20
    test_line_count_empty = 0
    test_max_length = 30
    batch_size = 5
    num_source_factors = num_target_factors = 1
    with tmp_digits_dataset("tmp_corpus", train_line_count,
                            train_line_count_empty,
                            train_max_length - C.SPACE_FOR_XOS, dev_line_count,
                            dev_max_length - C.SPACE_FOR_XOS, test_line_count,
                            test_line_count_empty,
                            test_max_length - C.SPACE_FOR_XOS) as data:
        # tmp common vocab
        vcb = vocab.build_from_paths(
            [data['train_source'], data['train_target']])

        train_iter, val_iter, config_data, data_info = data_io.get_training_data_iters(
            sources=[data['train_source']],
            targets=[data['train_target']],
            validation_sources=[data['dev_source']],
            validation_targets=[data['dev_target']],
            source_vocabs=[vcb],
            target_vocabs=[vcb],
            source_vocab_paths=[None],
            target_vocab_paths=[None],
            shared_vocab=True,
            batch_size=batch_size,
            batch_type=C.BATCH_TYPE_SENTENCE,
            max_seq_len_source=train_max_length,
            max_seq_len_target=train_max_length,
            bucketing=True,
            bucket_width=10)
        assert isinstance(train_iter, data_io.ParallelSampleIter)
        assert isinstance(val_iter, data_io.ParallelSampleIter)
        assert isinstance(config_data, data_io.DataConfig)
        assert data_info.sources == [data['train_source']]
        assert data_info.targets == [data['train_target']]
        assert data_info.source_vocabs == [None]
        assert data_info.target_vocabs == [None]
        assert config_data.data_statistics.max_observed_len_source == train_max_length
        assert config_data.data_statistics.max_observed_len_target == train_max_length
        assert np.isclose(config_data.data_statistics.length_ratio_mean,
                          expected_mean)
        assert np.isclose(config_data.data_statistics.length_ratio_std,
                          expected_std)

        assert train_iter.batch_size == batch_size
        assert val_iter.batch_size == batch_size

        # test some batches
        bos_id = vcb[C.BOS_SYMBOL]
        eos_id = vcb[C.EOS_SYMBOL]
        expected_first_target_symbols = torch.full((batch_size, 1),
                                                   bos_id,
                                                   dtype=torch.int32)
        for epoch in range(2):
            while train_iter.iter_next():
                batch = train_iter.next()
                assert isinstance(batch, data_io.Batch)
                source = batch.source
                target = batch.target
                label = batch.labels[
                    C.
                    TARGET_LABEL_NAME]  # TODO: still 2-shape: (batch, length)
                length_ratio_label = batch.labels[C.LENRATIO_LABEL_NAME]
                assert source.shape[0] == target.shape[0] == label.shape[
                    0] == batch_size
                assert source.shape[2] == target.shape[
                    2] == num_source_factors == num_target_factors
                # target first symbol should be BOS
                # each source sequence contains one EOS symbol
                assert torch.sum(source == eos_id) == batch_size
                assert torch.equal(target[:, 0], expected_first_target_symbols)
                # label first symbol should be 2nd target symbol
                assert torch.equal(label[:, 0], target[:, 1, 0])
                # each label sequence contains one EOS symbol
                assert torch.sum(label == eos_id) == batch_size
            train_iter.reset()
示例#8
0
def test_mx_pt_eq_prepared_data():
    pytest.importorskip("mxnet")
    from sockeye import data_io

    train_line_count = 100
    train_line_count_empty = 0
    train_max_length = 30
    dev_line_count = 20
    dev_max_length = 30
    test_line_count = 20
    test_line_count_empty = 0
    test_max_length = 30
    batch_size = 5
    batch_sentences_multiple_of = 8

    with tmp_digits_dataset("tmp_corpus", train_line_count,
                            train_line_count_empty,
                            train_max_length - C.SPACE_FOR_XOS, dev_line_count,
                            dev_max_length - C.SPACE_FOR_XOS, test_line_count,
                            test_line_count_empty,
                            test_max_length - C.SPACE_FOR_XOS) as data:

        with TemporaryDirectory() as work_dir, utils.create_pool(2) as pool:

            vcb = vocab.build_from_paths(
                [data['train_source'], data['train_target']])

            train_iters = {}
            val_iters = {}

            # For each implementation
            for key, data_io_module in (('mx', data_io), ('pt', data_io_pt)):
                output_folder = os.path.join(work_dir, key)
                os.mkdir(output_folder)

                # Create 1 shard (avoid random assignment that breaks equality)
                shards, keep_tmp_shard_files = data_io_module.create_shards(
                    source_fnames=[data['train_source']],
                    target_fnames=[data['train_target']],
                    num_shards=1,
                    output_prefix=output_folder)

                # Prepare data using multiple processes
                data_io_module.prepare_data(
                    source_fnames=[data['train_source']],
                    target_fnames=[data['train_target']],
                    source_vocabs=[vcb],
                    target_vocabs=[vcb],
                    source_vocab_paths=[None],
                    target_vocab_paths=[None],
                    shared_vocab=True,
                    max_seq_len_source=train_max_length,
                    max_seq_len_target=train_max_length,
                    bucketing=True,
                    bucket_width=10,
                    num_shards=1,
                    output_prefix=output_folder,
                    bucket_scaling=True,
                    keep_tmp_shard_files=keep_tmp_shard_files,
                    pool=pool,
                    shards=shards)

                # Create iterators
                train_iter, val_iter, _, _, _ = data_io_module.get_prepared_data_iters(
                    prepared_data_dir=output_folder,
                    validation_sources=[data['dev_source']],
                    validation_targets=[data['dev_target']],
                    shared_vocab=True,
                    batch_size=batch_size,
                    batch_type=C.BATCH_TYPE_SENTENCE,
                    batch_sentences_multiple_of=batch_sentences_multiple_of,
                    permute=False)

                train_iters[key] = train_iter
                val_iters[key] = val_iter

            # Check equality of all MXNet/PyTorch batches
            for iters in (train_iters, val_iters):
                for i, (mx_batch,
                        pt_batch) in enumerate(zip(iters['mx'], iters['pt']),
                                               1):
                    print(i)
                    _assert_mx_pt_batches_equal(mx_batch, pt_batch)