示例#1
0
 def test_no_text(self):
     with testing_utils.tempdir() as tmpdir:
         fp = os.path.join(tmpdir, "badfile.jsonl")
         with PathManager.open(fp, "w") as f:
             f.write(
                 '{"dialog": [[{"id": "speaker1"}, {"text": "Hello.", "id": "speaker2"}]]}\n'
             )
         opt = {
             'task': 'jsonfile',
             'fromfile_datapath': fp,
             'display_verbose': True
         }
         with self.assertRaises(AttributeError):
             testing_utils.display_data(opt)
示例#2
0
 def test_good_fileformat(self):
     """
     Checks that we succeed in loading a well formatted jsonl file.
     """
     with testing_utils.tempdir() as tmpdir:
         fp = os.path.join(tmpdir, "goodfile.jsonl")
         with PathManager.open(fp, "w") as f:
             f.write(
                 '{"dialog": [[{"text": "Hi.", "id": "speaker1"}, {"text": "Hello.", "id": "speaker2"}]]}\n'
             )
         opt = {
             'task': 'jsonfile',
             'jsonfile_datapath': fp,
             'verbose': True
         }
         testing_utils.display_data(opt)
示例#3
0
 def test_secondspeaker_label(self):
     with testing_utils.tempdir() as tmpdir:
         fp = os.path.join(tmpdir, "goodfile.jsonl")
         with PathManager.open(fp, "w") as f:
             f.write(
                 '{"dialog": [[{"text": "Hi.", "id": "speaker1"}, {"text": "Hello.", "id": "speaker2"}]]}\n'
             )
         opt = {
             'task': 'jsonfile',
             'jsonfile_datapath': fp,
             'verbose': True,
             'label_turns': 'secondspeaker',
         }
         train_out, valid_out, test_out = testing_utils.display_data(opt)
         texts = [
             l.split(':', 1)[-1].strip()
             for l in train_out.split('\n')
             if l in train_out
             if 'text' in l
         ]
         labels = [
             l.split(':', 1)[-1].strip()
             for l in train_out.split('\n')
             if l in train_out
             if 'labels' in l
         ]
         self.assertEqual(texts[0], 'Hi.')
         self.assertEqual(labels[0], 'Hello.')
示例#4
0
        def _test_display_output(opt):
            output = testing_utils.display_data(opt)
            train_labels = re.findall(r"\[labels: .*\]", output[0])
            valid_labels = re.findall(r"\[eval_labels: .*\]", output[1])
            test_labels = re.findall(r"\[eval_labels: .*\]", output[2])

            for i, lbls in enumerate([train_labels, valid_labels, test_labels]):
                self.assertGreater(len(lbls), 0, 'DisplayData failed')
                self.assertEqual(len(lbls), len(set(lbls)), output[i])
    def test_output(self):
        """
        Does display_data reach the end of the loop?
        """
        str_output, _, _ = testing_utils.display_data({
            'num_examples': 1,
            'task': 'babi:task1k:1'
        })

        self.assertGreater(len(str_output), 0, "Output is empty")
        self.assertIn("[babi:task1k:1]:", str_output,
                      "Babi task did not print")
        self.assertIn("~~", str_output, "Example output did not complete")
示例#6
0
    def _run_display_output(self, opt: Dict[str, Union[str, bool]],
                            ep_ex_counts: List[Tuple[int, int]]):
        """
        Run display output,
        """
        output = testing_utils.display_data(opt)
        stats = [o.split('\n')[-2] for o in output]

        for i, ((ep, ex), out) in enumerate(zip(ep_ex_counts, stats)):
            self.assertEqual(
                out,
                f'[ loaded {ep} episodes with a total of {ex} examples ]',
                output[i],
            )
示例#7
0
    def test_one_episode(self):
        with testing_utils.tempdir() as tmpdir:
            fp = os.path.join(tmpdir, "badfile.txt")
            with PathManager.open(fp, "w") as f:
                for _ in range(1000):
                    f.write('id:test_file\ttext:placeholder\tlabels:placeholder\n\n')
            opt = {'task': 'fromfile', 'fromfile_datapath': fp, 'verbose': True}
            with self.assertLogs(logger=logging.logger, level='DEBUG') as cm:
                testing_utils.display_data(opt)
                print("\n".join(cm.output))
                assert any('long episode' in l for l in cm.output)

            # invert the logic of the assertion
            with self.assertRaises(self.failureException):
                fp = os.path.join(tmpdir, "goodfile.txt")
                with PathManager.open(fp, "w") as f:
                    for _ in range(1000):
                        f.write(
                            'id:test_file\ttext:placeholder\tlabels:placeholder\tepisode_done:True\n\n'
                        )
                opt = {'task': 'fromfile', 'fromfile_datapath': fp, 'verbose': True}
                with self.assertLogs(logger=logging.logger, level='DEBUG') as cm:
                    testing_utils.display_data(opt)
                    assert any('long episode' in l for l in cm.output)
示例#8
0
    def test_display_data_with_prepend_gold(self):
        parser = setup_args()
        opt = parser.parse_args(
            ['--task', 'wizard_of_internet:WizardDialogGoldKnowledgeTeacher'])
        for out_type in display_data(opt):
            started_knowledge_span = False
            for token in [w.strip() for w in out_type.split() if w.strip()]:
                if token == CONST.KNOWLEDGE_TOKEN:
                    self.assertFalse(started_knowledge_span)
                    started_knowledge_span = True
                elif token == CONST.END_KNOWLEDGE_TOKEN:
                    self.assertTrue(started_knowledge_span)
                    started_knowledge_span = False

            self.assertFalse(started_knowledge_span)
    def test_dataset_integrity(self):
        """
        Check the controllble dialogue data loads.
        """
        train_output, valid_output, test_output = testing_utils.display_data(
            {'task': 'self_feeding:all'})

        # check valid data
        self.assertIn("i am spending time with my 4 sisters", train_output)
        self.assertIn('193777 episodes with a total of 193777 examples',
                      train_output)

        # check valid data
        self.assertIn('3500 examples', valid_output)
        # check test data
        self.assertIn('7801 examples', test_output)
示例#10
0
 def test_display_data(self):
     parser = setup_args()
     opt = parser.parse_args(['--task', 'wizard_of_internet'])
     display_data(opt)