Пример #1
0
    def test_glue(self):
        test_idx = 10
        input_data = {
            'q1': 'How so?',
            'q2': 'Why not?',
            'q3': 'Who?',
            'idx': test_idx,
            'label': 0,
        }
        og_dataset = tf.data.Dataset.from_tensors(input_data)
        benchmark_name = 'qqp'
        label_names = ['not_duplicate', 'duplicate']

        dataset = prep.glue(og_dataset, benchmark_name, label_names)
        assert_dataset(
            dataset,
            {
                'inputs': 'qqp q1: How so? q2: Why not? q3: Who?',
                'targets': 'not_duplicate',
                'idx': test_idx,
            },
        )

        # Test `feature_names` argument.
        dataset = prep.glue(og_dataset,
                            benchmark_name,
                            label_names,
                            feature_names=['q3', 'q1'])
        assert_dataset(
            dataset,
            {
                'inputs': 'qqp q3: Who? q1: How so?',
                'targets': 'not_duplicate',
                'idx': test_idx,
            },
        )

        # Test target is <unk> when label is -1
        input_data['label'] = -1
        og_dataset = tf.data.Dataset.from_tensors(input_data)
        dataset = prep.glue(og_dataset, benchmark_name, label_names)
        assert_dataset(
            dataset,
            {
                'inputs': 'qqp q1: How so? q2: Why not? q3: Who?',
                'targets': '<unk>',
                'idx': test_idx,
            },
        )
Пример #2
0
    def test_multirc(self):
        og_dataset = tf.data.Dataset.from_tensors({
            'paragraph':
            '<b>Sent 1: </b>Once upon a time, there was a squirrel named Joey.<br><b>Sent 2: </b>Joey loved to go outside and play with his cousin Jimmy.',
            'question':
            'Why was Joey surprised the morning he woke up for breakfast?',
            'answer': 'There was only pie to eat',
            'label': 1,
            'idx': {
                'paragraph': 5,
                'question': 1,
                'answer': 3
            }
        })

        dataset = prep.glue(
            og_dataset,
            'multirc',
            label_names=['False', 'True'],
            feature_names=('question', 'answer', 'paragraph'),
        )
        assert_dataset(
            dataset,
            {
                'inputs':
                'multirc question: Why was Joey surprised the morning he woke up for breakfast? answer: There was only pie to eat paragraph: Sent 1: Once upon a time, there was a squirrel named Joey. Sent 2: Joey loved to go outside and play with his cousin Jimmy.',
                'targets': 'True',
                'idx/paragraph': 5,
                'idx/question': 1,
                'idx/answer': 3,
            },
        )