Пример #1
0
    def test_label_to_text_teacher(self):

        # Set up regular teacher
        kwargs = {'task': 'integration_tests:multiturn'}
        parser = setup_args()
        parser.set_defaults(**kwargs)
        opt = parser.parse_args([])
        agent = RepeatLabelAgent(opt)
        regular_world = create_task(opt, agent)

        # Set up label-to-text teacher
        kwargs = {
            'task': 'wrapper:labelToTextTeacher',
            'wrapper_task': 'integration_tests:multiturn',
        }
        parser = setup_args()
        parser.set_defaults(**kwargs)
        opt = parser.parse_args([])
        agent = RepeatLabelAgent(opt)
        label_to_text_world = create_task(opt, agent)

        num_examples = 0
        while num_examples < 5:
            regular_world.parley()
            regular_example = regular_world.get_acts()[0]
            label_to_text_world.parley()
            label_to_text_example = label_to_text_world.get_acts()[0]
            self.assertEqual(label_to_text_example['text'],
                             regular_example['labels'][0])
            self.assertEqual(label_to_text_example['labels'], [''])
            num_examples += 1
    def test_counts(self):

        with testing_utils.tempdir() as tmpdir:
            data_path = tmpdir

            opts_episodes_and_examples = [
                ({
                    'datatype': 'train'
                }, 4819, 27018),
                ({
                    'datatype': 'valid'
                }, 1009, 5651),
                ({
                    'datatype': 'test'
                }, 980, 5482),
            ]
            for kwargs, num_episodes, num_examples in opts_episodes_and_examples:
                all_kwargs = {
                    **kwargs,
                    'task': 'blended_skill_talk',
                    'datapath': data_path,
                }
                parser = setup_args()
                parser.set_defaults(**all_kwargs)
                opt = parser.parse_args([])
                agent = RepeatLabelAgent(opt)
                teacher = create_task(opt, agent).get_task_agent()
                self.assertEqual(teacher.num_episodes(), num_episodes)
                self.assertEqual(teacher.num_examples(), num_examples)
Пример #3
0
def display_data(opt):
    """
    Run through a display data run.

    :return: (stdout_train, stdout_valid, stdout_test)
    :rtype: (str, str, str)
    """
    import parlai.scripts.display_data as dd

    parser = dd.setup_args()
    parser.set_params(**opt)
    popt = parser.parse_args([])

    with capture_output() as train_output:
        popt['datatype'] = 'train:stream'
        dd.display_data(popt)
    with capture_output() as valid_output:
        popt['datatype'] = 'valid:stream'
        dd.display_data(popt)
    with capture_output() as test_output:
        popt['datatype'] = 'test:stream'
        dd.display_data(popt)

    return (train_output.getvalue(), valid_output.getvalue(),
            test_output.getvalue())
Пример #4
0
    def test_counts(self):

        with testing_utils.tempdir() as tmpdir:
            data_path = tmpdir
            cases = [
                ('LabeledBlendedSkillTalk', 'train', 4819, 27018),
                ('LabeledBlendedSkillTalk', 'valid', 1009, 5651),
                ('LabeledBlendedSkillTalk', 'test', 980, 5482),
                ('LabeledConvAI2PersonaTopicifier', 'train', 17878, 131438),
                ('LabeledConvAI2PersonaTopicifier', 'valid', 1000, 7801),
                ('LabeledConvAI2PersonaTopicifier', 'test', 1000, 7801),
                ('LabeledEDPersonaTopicifier', 'train', 39057, 64636),
                ('LabeledEDPersonaTopicifier', 'valid', 2769, 5738),
                ('LabeledEDPersonaTopicifier', 'test', 2547, 5259),
                ('LabeledWoWPersonaTopicifier', 'train', 18430, 74092),
                ('LabeledWoWPersonaTopicifier', 'valid', 981, 3939),
                ('LabeledWoWPersonaTopicifier', 'test', 965, 3865),
            ]
            for teacher_name, datatype, num_episodes, num_examples in cases:
                all_kwargs = {
                    'task': f'style_gen:{teacher_name}',
                    'datatype': datatype,
                    'datapath': data_path,
                }
                parser = setup_args()
                parser.set_defaults(**all_kwargs)
                opt = parser.parse_args([])
                agent = RepeatLabelAgent(opt)
                teacher = create_task(opt, agent).get_task_agent()
                self.assertEqual(teacher.num_episodes(), num_episodes)
                self.assertEqual(teacher.num_examples(), num_examples)
Пример #5
0
 def setUpClass(cls):
     # go ahead and download things here
     parser = display_data.setup_args()
     parser.set_defaults(**END2END_OPTIONS)
     opt = parser.parse_args([], print_args=False)
     opt['num_examples'] = 1
     display_data.display_data(opt)
Пример #6
0
 def setUpClass(cls):
     """Set up the test by downloading the model/data."""
     with testing_utils.capture_output():
         parser = display_data.setup_args()
         parser.set_defaults(**MODEL_OPTIONS)
         opt = parser.parse_args(print_args=False)
         opt['num_examples'] = 1
         display_data.display_data(opt)
Пример #7
0
 def setUpClass(cls):
     # go ahead and download things here
     parser = display_data.setup_args()
     parser.set_defaults(**END2END_OPTIONS)
     opt = parser.parse_args([])
     opt['num_examples'] = 1
     opt['verbose'] = True
     display_data.display_data(opt)
Пример #8
0
 def setUpClass(cls):
     # go ahead and download things here
     with testing_utils.capture_output():
         parser = display_data.setup_args()
         parser.set_defaults(**END2END_OPTIONS)
         opt = parser.parse_args(print_args=False)
         opt['num_examples'] = 1
         display_data.display_data(opt)
Пример #9
0
 def test_display_data(self):
     parser = setup_args()
     opt = parser.parse_args([
         '--task',
         'wizard_of_internet:ApprenticeDialogTeacher',
         '--num-examples',
         '100000',
     ])
     display_data(opt)
Пример #10
0
 def setUpClass(cls):
     """
     Set up the test by downloading the model/data.
     """
     parser = display_data.setup_args()
     parser.set_defaults(**MODEL_OPTIONS)
     opt = parser.parse_args([])
     opt['num_examples'] = 1
     display_data.display_data(opt)
Пример #11
0
def display_data(opt):
    """
    Run through a display data run.

    :return: (stdout_train, stdout_valid, stdout_test)
    :rtype: (str, str, str)
    """
    import parlai.scripts.display_data as dd

    parser = dd.setup_args()
    parser.set_params(**opt)
def setup_image_context_args():
    task_parser = setup_args()
    default_image_context_path = os.path.join(os.path.dirname(run.__file__),
                                              'task_config', 'image_contexts')
    task_parser.add_argument(
        '--image-context-path',
        type=str,
        default=default_image_context_path,
        help='Save path for image context file',
    )
    return task_parser
Пример #13
0
    def test_safe_personas(self):

        base_kwargs = Opt({'datatype': 'train', 'task': 'blended_skill_talk'})
        safe_personas_only_to_count = {False: 4819, True: 3890}
        for safe_personas_only, count in safe_personas_only_to_count.items():
            full_kwargs = {**base_kwargs, 'safe_personas_only': safe_personas_only}
            parser = setup_args()
            parser.set_defaults(**full_kwargs)
            opt = parser.parse_args([])
            personas = _load_personas(opt)
            self.assertEqual(len(personas), count)
Пример #14
0
 def test_gold_knowledge_teacher(self):
     parser = setup_args()
     opt = parser.parse_args([
         '--task',
         'wizard_of_internet:GoldKnowledgeTeacher',
         '--dialog-history',
         'onlylast',
         '--include-persona',
         'true',
         '--num-examples',
         '100000',
     ])
     display_data(opt)
Пример #15
0
 def test_gold_doc_titles_teacher(self):
     parser = setup_args()
     opt = parser.parse_args([
         '--task',
         'wizard_of_internet:GoldDocTitlesTeacher',
         '--dialog-history',
         'full',
         '--include-persona',
         'false',
         '--num-examples',
         '100000',
     ])
     display_data(opt)
Пример #16
0
    def _run_display_test(self, kwargs):
        with testing_utils.capture_output() as stdout:
            parser = setup_args()
            parser.set_defaults(**kwargs)
            opt = parser.parse_args([])
            agent = RepeatLabelAgent(opt)
            world = create_task(opt, agent)
            display(opt)

        str_output = stdout.getvalue()
        self.assertTrue(
            '[ loaded {} episodes with a total of {} examples ]'.format(
                world.num_episodes(), world.num_examples()) in str_output,
            'Wizard of Wikipedia failed with following args: {}'.format(opt),
        )
Пример #17
0
    def setUp(self):
        parser = setup_args()
        parser.set_defaults(
            interactive_mode=True,
            task='self_chat',
            selfchat_task=True,
            selfchat_max_turns=1,
        )
        self.opt = parser.parse_args([])

        agent1 = RepeatLabelAgent(self.opt)
        agent2 = agent1.clone()

        self.world = create_task(self.opt, [agent1, agent2])
        self.assertIsInstance(self.world, SelfChatBaseWorld)
Пример #18
0
    def test_display_data_with_prepend_gold(self):
        parser = setup_args()
        opt = parser.parse_args(
            ['--task', 'wizard_of_internet:WizardDialogGoldKnowledgeTeacher'])
        for out_type in display_data(opt):
            started_knowledge_span = False
            for token in [w.strip() for w in out_type.split() if w.strip()]:
                if token == CONST.KNOWLEDGE_TOKEN:
                    self.assertFalse(started_knowledge_span)
                    started_knowledge_span = True
                elif token == CONST.END_KNOWLEDGE_TOKEN:
                    self.assertTrue(started_knowledge_span)
                    started_knowledge_span = False

            self.assertFalse(started_knowledge_span)
Пример #19
0
    def run_display_test(self, kwargs):
        f = io.StringIO()
        with redirect_stdout(f):
            parser = setup_args()
            parser.set_defaults(**kwargs)
            opt = parser.parse_args()
            agent = RepeatLabelAgent(opt)
            world = create_task(opt, agent)
            display(opt)

        str_output = f.getvalue()
        self.assertTrue(
            '[ loaded {} episodes with a total of {} examples ]'.format(
                world.num_episodes(), world.num_examples()) in str_output,
            'Wizard of Wikipedia failed with following args: {}'.format(opt))
Пример #20
0
 def _split_type_teacher(
     split_type: str,
 ) -> CMUDocumentGroundedConversationsTeacher:
     kwargs = {
         'task': 'cmu_dog',
         'datatype': 'valid',
         'cmu_dog_split_type': split_type,
         'datapath': data_path,
     }
     parser = setup_args()
     parser.set_defaults(**kwargs)
     opt = parser.parse_args([])
     agents = create_task_agent_from_taskname(opt)
     assert isinstance(agents, List)
     task = agents[0]
     assert isinstance(task, CMUDocumentGroundedConversationsTeacher)
     return task
Пример #21
0
    def test_counts(self):

        with testing_utils.tempdir() as tmpdir:
            proportions = [0.1, 0.5, 1.0]
            for proportion in proportions:
                all_kwargs = {
                    'datatype': 'train',
                    'task': 'light_dialog',
                    'datapath': tmpdir,
                    'light_percent_train_exs': proportion,
                }
                parser = setup_args()
                parser.set_defaults(**all_kwargs)
                opt = parser.parse_args([])
                agent = RepeatLabelAgent(opt)
                teacher = create_task(opt, agent).get_task_agent()
                self.assertEqual(teacher.num_episodes(), int(NUM_EPS * proportion))
Пример #22
0
    def test_share(self, mock_load_personas):
        test_personas = ['your persona:I live on a pirate\'s shoulder']
        with testing_utils.tempdir() as data_path:
            mock_load_personas.return_value = test_personas
            kwargs = {
                'task': 'blended_skill_talk',
                'datapath': data_path,
                'interactive_task': True,
                'interactive_mode': True,
            }
            parser = setup_args()
            parser.set_defaults(**kwargs)
            opt = parser.parse_args([])
            agent = RepeatLabelAgent(opt)
            agent2 = agent.clone()
            world = InteractiveWorld(opt=opt, agents=[agent, agent2])
            # We should not reload personas on share
            mock_load_personas.return_value = None
            new_world = world.clone()

            self.assertEqual(new_world.contexts_data, test_personas)
Пример #23
0
 def test_shuffle(self):
     """Simple test to ensure that dataloader is initialized with correct
         data sampler
     """
     dts = ['train', 'valid', 'test']
     exts = ['', ':stream', ':ordered', ':stream:ordered']
     shuffle_opts = [False, True]
     task = 'babi:task1k:1'
     for dt in dts:
         for ext in exts:
             datatype = dt + ext
             for shuffle in shuffle_opts:
                 opt_defaults = {
                     'pytorch_teacher_task': task,
                     'datatype': datatype,
                     'shuffle': shuffle
                 }
                 print('Testing {} with args {}'.format(task, opt_defaults))
                 f = io.StringIO()
                 with redirect_stdout(f):
                     parser = setup_args()
                     parser.set_defaults(**opt_defaults)
                     opt = parser.parse_args()
                     teacher = create_task_agent_from_taskname(opt)[0]
                 if ('ordered' in datatype or
                     ('stream' in datatype and not opt.get('shuffle'))
                         or 'train' not in datatype):
                     self.assertTrue(
                         type(teacher.pytorch_dataloader.sampler) is
                         Sequential,
                         'PytorchDataTeacher failed with args: {}'.format(
                             opt))
                 else:
                     self.assertTrue(
                         type(teacher.pytorch_dataloader.sampler) is
                         RandomSampler,
                         'PytorchDataTeacher failed with args: {}'.format(
                             opt))
Пример #24
0
    def test_check_examples(self):

        with testing_utils.tempdir() as tmpdir:
            data_path = tmpdir

            # Check the first entry (entry_idx==0) of the second episode for the train
            # set, in order to check the context for an episode that has a WoW topic
            # string
            train_opt_and_example = (
                {
                    'datatype': 'train'
                },
                {
                    'text':
                    "your persona: i just bought a new house with my partner.\nyour persona: i like to make my own coffee.\nLasagne\nOh, I love lasagne. I make my own noodles as well as the sauce. \nWow.  That's amazing.  I read where lasagne originated in Italy during the Middle Ages.  \nOh really!? That is interesting. I am actually italian myself.",
                    'labels': [
                        "Awesome. Me and my partner just bought a house. I can't wait to cook in my kitchen."
                    ],
                    'context_dataset':
                    'wizard_of_wikipedia',
                    'free_message':
                    'Oh really!? That is interesting. I am actually italian myself.',
                    'convai2':
                    'yum . i like to make lasagna and it s so good',
                    'empathetic_dialogues':
                    'Cool. I love italian. Real italian.',
                    'wizard_of_wikipedia':
                    "Wow.  That's amazing.  I read where lasagne originated in Italy during the Middle Ages.",
                    'guided_chosen_suggestion':
                    ' ',
                    'episode_done':
                    False,
                },
            )
            all_kwargs = {
                **train_opt_and_example[0],
                'task': 'blended_skill_talk',
                'datapath': data_path,
            }
            parser = setup_args()
            parser.set_defaults(**all_kwargs)
            opt = parser.parse_args([])
            agent = RepeatLabelAgent(opt)
            teacher = create_task(opt, agent).get_task_agent()
            self.assertEqual(teacher.get(episode_idx=1, entry_idx=0),
                             train_opt_and_example[1])

            # Check the second entry (entry_idx==1) of the second episode for each dataset
            opts_and_examples = [
                (
                    {
                        'datatype': 'train'
                    },
                    {
                        'text':
                        'Moving in a new place can be a lot of fun. Are you a good cook?',
                        'labels': [
                            'I like to think so. I love to make coffee for an after dinner treat too.'
                        ],
                        'context_dataset':
                        'wizard_of_wikipedia',
                        'free_message':
                        'Moving in a new place can be a lot of fun. Are you a good cook?',
                        'convai2':
                        'yes ! trying to master lasagna .',
                        'empathetic_dialogues':
                        "See. I'm not a great cook.",
                        'wizard_of_wikipedia':
                        'With the training and skills I have, I can cook pretty much anything.',
                        'guided_chosen_suggestion':
                        ' ',
                        'episode_done':
                        False,
                    },
                ),
                (
                    {
                        'datatype': 'valid'
                    },
                    {
                        'text':
                        'I like to go mountain biking with my friends.',
                        'labels': [
                            "I have never done that.  Not really the physical activity type, but I'd be willing to give it a try, I guess"
                        ],
                        'context_dataset':
                        'empathetic_dialogues',
                        'free_message':
                        'I like to go mountain biking with my friends.',
                        'convai2':
                        "that's so cool , i love biking",
                        'empathetic_dialogues':
                        "Ive never been on any but I'll try it out",
                        'wizard_of_wikipedia':
                        "That's interesting!  Most mountain biking is in the categories of Trail and Cross Country riding styles",
                        'guided_chosen_suggestion':
                        '',
                        'label_candidates': {
                            'num_cands': 100,
                            'first':
                            'i work as a vet so no days off over here!',
                            'last': 'And what else? ',
                        },
                        'episode_done':
                        False,
                    },
                ),
                (
                    {
                        'datatype': 'test'
                    },
                    {
                        'text':
                        "He eats insects, leaves and sun flower seeds. It's easy. They don't need walking and cleanup is simple. Do you have any pets?",
                        'labels': [
                            'No, not at the moment.  I have 3 girls and they are enough trouble! LOL'
                        ],
                        'context_dataset':
                        'empathetic_dialogues',
                        'free_message':
                        "He eats insects, leaves and sun flower seeds. It's easy. They don't need walking and cleanup is simple. Do you have any pets?",
                        'convai2':
                        "no , i don't have any pets either .",
                        'empathetic_dialogues':
                        'I do not just a cat',
                        'wizard_of_wikipedia':
                        "I actually do.  He is ten years old and loves to be outside.  He's fat and furry.",
                        'guided_chosen_suggestion':
                        '',
                        'label_candidates': {
                            'num_cands':
                            100,
                            'first':
                            "Wow, engineering, sounds impressive.  I'm sure the income will be awesome.",
                            'last':
                            'but the worst part is you have to clean every day and keep the flat tidy all the time.  ',
                        },
                        'episode_done':
                        False,
                    },
                ),
            ]
            for kwargs, example in opts_and_examples:
                all_kwargs = {
                    **kwargs,
                    'task': 'blended_skill_talk',
                    'datapath': data_path,
                }
                parser = setup_args()
                parser.set_defaults(**all_kwargs)
                opt = parser.parse_args([])
                agent = RepeatLabelAgent(opt)
                teacher = create_task(opt, agent).get_task_agent()
                actual_message = teacher.get(episode_idx=1, entry_idx=1)

                # Check for field equality
                self.assertEqual(set(actual_message.keys()),
                                 set(example.keys()))

                # Check label candidates
                if 'label_candidates' in example:
                    params = example['label_candidates']
                    self.assertEqual(len(actual_message['label_candidates']),
                                     params['num_cands'])
                    self.assertEqual(actual_message['label_candidates'][0],
                                     params['first'])
                    self.assertEqual(actual_message['label_candidates'][-1],
                                     params['last'])

                # Check other fields
                for key in [
                        k for k in example.keys() if k != 'label_candidates'
                ]:
                    self.assertEqual(example[key], actual_message[key])
Пример #25
0
 def test_display_data(self):
     parser = setup_args()
     opt = parser.parse_args(['--task', 'wizard_of_internet'])
     display_data(opt)
Пример #26
0
    def test_check_examples(self):

        # Define all pairs of task strings and examples
        tasks_and_messages = [
            (
                "blended_skill_talk:ConvAI2PersonaTopicifier",
                {
                    'text':
                    "your persona: i like to remodel homes.\nyour persona: i like to go hunting.\nyour persona: i like to shoot a bow.\nyour persona: my favorite holiday is halloween.\nNicholas Sparks\nhi , how are you doing ? i'm getting ready to do some cheetah chasing to stay in shape .",
                    'labels':
                    ('you must be very fast . hunting is one of my favorite hobbies .',
                     ),
                    'reward':
                    0,
                    'label_candidates': (
                        'my mom was single with 3 boys , so we never left the projects .',
                        'i try to wear all black every day . it makes me feel comfortable .',
                        'well nursing stresses you out so i wish luck with sister',
                        'yeah just want to pick up nba nfl getting old',
                        'i really like celine dion . what about you ?',
                        'no . i live near farms .',
                        "i wish i had a daughter , i'm a boy mom . they're beautiful boys though still lucky",
                        'yeah when i get bored i play gone with the wind my favorite movie .',
                        "hi how are you ? i'm eatingdinner with my hubby and 2 kids .",
                        'were you married to your high school sweetheart ? i was .',
                        'that is great to hear ! are you a competitive rider ?',
                        "hi , i'm doing ok . i'm abanker . how about you ?",
                        "i'm 5 years old",
                        'hi there . how are you today ?',
                        'i totally understand how stressful that can be .',
                        'yeah sometimes you do not know what you are actually watching',
                        'mother taught me to cook ! we are looking for an exterminator .',
                        'i enjoy romantic movie . what is your favorite season ? mine is summer .',
                        'editing photos takesa lot of work .',
                        'you must be very fast . hunting is one of my favorite hobbies .',
                    ),
                    'episode_done':
                    False,
                },
            ),
            (
                "blended_skill_talk:EDPersonaTopicifier",
                {
                    'situation':
                    'I remember going to the fireworks with my best friend. There was a lot of people, but it only felt like us in the world.',
                    'emotion':
                    'sentimental',
                    'prepend_ctx':
                    None,
                    'prepend_cand':
                    None,
                    'deepmoji_ctx':
                    None,
                    'deepmoji_cand':
                    None,
                    'text':
                    'your persona: people hate that i obsess about the poor.\nyour persona: i like to make cellphone apps that would help heal our world.\nyour persona: i like to watch people pray together.\nyour persona: people don t like me too much but i like them anyways.\nAndroid (operating system)#Applications\nI remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world.',
                    'labels': [
                        'Was this a friend you were in love with, or just a best friend?'
                    ],
                    'episode_done':
                    False,
                },
            ),
            (
                "blended_skill_talk:WoWPersonaTopicifier",
                {
                    'id':
                    'WizardDialogKnowledgeTeacher',
                    'text':
                    "your persona: not a day goes by that i don't drink four mountain dews.\nyour persona: i enjoy movies about aliens invading the earth.\nyour persona: my favorite hobby is chess.\nyour persona: i just dyed my hair hot pink with purple highlights.\nScience fiction\n",
                    'labels': [
                        "I think science fiction is an amazing genre for anything. Future science, technology, time travel, FTL travel, they're all such interesting concepts."
                    ],
                    'chosen_topic':
                    'Science fiction',
                    'episode_done':
                    False,
                    'label_candidates': [],
                    'knowledge':
                    'Science fiction Science fiction (often shortened to SF or sci-fi) is a genre of speculative fiction, typically dealing with imaginative concepts such as futuristic science and technology, space travel, time travel, faster than light travel, parallel universes, and extraterrestrial life.\nScience fiction Science fiction often explores the potential consequences of scientific and other innovations, and has been called a "literature of ideas".\nScience fiction It usually avoids the supernatural, unlike the related genre of fantasy.\nScience fiction Historically, science-fiction stories have had a grounding in actual science, but now this is only expected of hard science fiction.\nScience fiction Science fiction is difficult to define, as it includes a wide range of subgenres and themes.\nScience fiction Hugo Gernsback, who suggested the term "scientifiction" for his "Amazing Stories" magazine, wrote: "By \'scientifiction\' I mean the Jules Verne, H. G. Wells and Edgar Allan Poe type of story—a charming romance intermingled with scientific fact and prophetic vision... Not only do these amazing tales make tremendously interesting reading—they are always instructive.\nScience fiction They supply knowledge... in a very palatable form... New adventures pictured for us in the scientifiction of today are not at all impossible of realization tomorrow...\n',
                    'title':
                    'Science fiction',
                    'checked_sentence':
                    'Science fiction (often shortened to SF or sci-fi) is a genre of speculative fiction, typically dealing with imaginative concepts such as futuristic science and technology, space travel, time travel, faster than light travel, parallel universes, and extraterrestrial life.',
                },
            ),
        ]
        for task_string, desired_message in tasks_and_messages:

            # Get message
            kwargs = {'task': task_string, 'datatype': 'train:ordered'}
            parser = setup_args()
            parser.set_defaults(**kwargs)
            opt = parser.parse_args([])
            agent = RepeatLabelAgent(opt)
            teacher = create_task(opt, agent).get_task_agent()
            actual_message = teacher.get(episode_idx=0, entry_idx=0)

            print(f'\nChecking {task_string}:')
            for key in desired_message.keys():
                if key in ['label_candidates']:
                    # These are often created randomly and thus will vary
                    continue
                print(key)
                self.assertEqual(desired_message[key], actual_message[key])
            print('')
Пример #27
0
#!/usr/bin/env python3

# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
"""Basic example which iterates through the tasks specified and prints them out.
Used for verification of data loading and iteration.

For more documentation, see parlai.scripts.display_data.
"""

from parlai.scripts.display_data import display_data, setup_args
import random

if __name__ == '__main__':
    random.seed(42)

    # Get command line arguments
    parser = setup_args()
    opt = parser.parse_args()
    display_data(opt)
Пример #28
0
    def test_custom_eval(self):
        """
        Test whether custom evaluation works.
        """
        with testing_utils.capture_output():
            parser = setup_args()
            opt = parser.parse_args([
                '--task',
                'wizard_of_wikipedia',
                '--datatype',
                'valid',
                '--label-type',
                'chosen_sent',
            ])
            teacher = create_task_agent_from_taskname(opt)[0]

        title = 'Gardening'
        cands = list('four')

        text = "Gardening\nI like Gardening, even when I've only been doing it for a short time."
        response = 'I live on a farm, we garden all year long, it is very relaxing.'
        checked_sent = (
            'Gardening is considered by many people to be a relaxing activity.'
        )
        checked_sent_label = f'{title}{TOKEN_KNOWLEDGE}{checked_sent}'

        retrieval_metric_keys = [
            'passage_r@1', 'passage_r@5', 'title_r@1', 'title_r@5'
        ]

        chosen_sent_teacher_action = Message({
            'text':
            text,
            'labels': [checked_sent_label],
            'title': [title],
            'checked_sentence': [checked_sent],
        })
        correct_chosen_sent_response = Message({
            'text':
            checked_sent_label,
            'title_candidates': [title] + cands,
            'text_candidates': [checked_sent_label] + cands,
        })
        top5_chosen_sent_response = Message({
            'text':
            f'hello{TOKEN_KNOWLEDGE}goodbye',
            'title_candidates':
            cands + [title],
            'text_candidates':
            cands + [checked_sent_label],
        })
        incorrect_chosen_sent_response = Message({
            'text': f'hello{TOKEN_KNOWLEDGE}goodbye',
            'title_candidates': cands,
            'text_candidates': cands,
        })

        response_teacher_action = Message({
            'text': text,
            'labels': [response],
            'checked_sentence': checked_sent
        })
        high_f1_response = Message({'text': checked_sent})
        low_f1_response = Message({'text': 'incorrect'})

        # 1) Test with correct top sentence
        teacher.reset_metrics()
        teacher.custom_evaluation(
            chosen_sent_teacher_action,
            [checked_sent_label],
            correct_chosen_sent_response,
        )
        report = teacher.report()
        for k in retrieval_metric_keys:
            assert k in report
            assert report[k] == AverageMetric(1)

        # 2) Test with top sentence in top 5
        teacher.reset_metrics()
        teacher.custom_evaluation(chosen_sent_teacher_action,
                                  [checked_sent_label],
                                  top5_chosen_sent_response)
        report = teacher.report()
        for k in retrieval_metric_keys:
            assert k in report
            assert report[k] == AverageMetric(
                1) if '5' in k else AverageMetric(0)

        # 3) Test with no top sentences
        teacher.reset_metrics()
        teacher.custom_evaluation(
            chosen_sent_teacher_action,
            [checked_sent_label],
            incorrect_chosen_sent_response,
        )
        report = teacher.report()
        for k in retrieval_metric_keys:
            assert k in report
            assert report[k] == AverageMetric(0)

        # 4) Test knowledge f1 with high f1
        teacher.label_type = 'response'
        teacher.reset_metrics()
        teacher.custom_evaluation(response_teacher_action, [response],
                                  high_f1_response)
        report = teacher.report()
        assert 'knowledge_f1' in report
        assert report['knowledge_f1'] == F1Metric(1)

        # 5) Test knowledge f1 with low f1
        teacher.reset_metrics()
        teacher.custom_evaluation(response_teacher_action, [response],
                                  low_f1_response)
        report = teacher.report()
        assert 'knowledge_f1' in report
        assert report['knowledge_f1'] == F1Metric(0)
Пример #29
0
    def test_check_examples(self):

        with testing_utils.tempdir() as tmpdir:
            data_path = tmpdir

            # Check the first entry for 3 sample files
            teachers_datatypes_and_examples = [
                (
                    'LabeledBlendedSkillTalk',
                    'train',
                    {
                        'id': 'internal:blended_skill_talk',
                        'text':
                        "your persona: i've 2 kids.\nyour persona: i love flowers.\nI love live music, that's why I try to go to concerts\nI do too. Wat do you like?\nI like acting, I hope to be an actor, what about you?",
                        'labels': ['that is ok.  have any kids?'],
                        'context_dataset': 'empathetic_dialogues',
                        'free_turker_message':
                        'I like acting, I hope to be an actor, what about you?',
                        'guided_turker_chosen_suggestion': ' ',
                        'personality': 'Maternal (Mother-like)',
                        'episode_done': False,
                    },
                ),
                (
                    'LabeledConvAI2PersonaTopicifier',
                    'valid',
                    {
                        'id':
                        'internal:blended_skill_talk:ConvAI2PersonaTopicifierTeacher',
                        'text':
                        "your persona: i read twenty books a year.\nyour persona: i'm a stunt double as my second job.\nyour persona: i only eat kosher.\nyour persona: i was raised in a single parent household.\nAlabama\nhello what are doing today ?",
                        'labels': [
                            'i am good , i just got off work and tired , i have two jobs .'
                        ],
                        'personality':
                        'Lazy',
                        'episode_done':
                        False,
                        'label_candidates': {
                            'num_cands':
                            20,
                            'first':
                            'oh really ? i am actually in high school and i am graduating as class of 2019 !',
                            'last':
                            'i am good , i just got off work and tired , i have two jobs .',
                        },
                    },
                ),
                (
                    'LabeledEDPersonaTopicifier',
                    'test',
                    {
                        'id':
                        'internal:blended_skill_talk:EDPersonaTopicifierTeacher',
                        'text':
                        "your persona: my mom raised me by herself and taught me to play baseball.\nyour persona: i blog about salt water aquarium ownership.\nyour persona: i still love to line dry my clothes.\nyour persona: i am allergic to peanuts.\nyour persona: i'll one day own a ferret.\nMarine aquarium\nYeah about 10 years ago I had a horrifying experience. It was 100% their fault but they hit the water barrels and survived. They had no injuries but they almost ran me off the road.",
                        'labels': ['Did you suffer any injuries?'],
                        'situation':
                        "I felt guilty when I was driving home one night and a person tried to fly into my lane, and didn't see me. I honked and they swerved back into their lane, slammed on their brakes, and hit the water cones.",
                        'emotion': 'guilty',
                        'prepend_ctx': 'None',
                        'prepend_cand': 'None',
                        'deepmoji_ctx': 'None',
                        'deepmoji_cand': 'None',
                        'personality': 'Curious',
                        'episode_done': False,
                        'label_candidates': {
                            'num_cands':
                            100,
                            'first':
                            'I hope it goes well! If it makes you feel any better, most of them are probably just as nervous and are looking for any excuse to relax and let their guard down, too. Good luck',
                            'last':
                            "I know how you feel.  I moved away from my family and friends this summer.  Do you have family nearby at all? I often feel lonely when I'm watching a movie by myself but then I remind myself that I'm loved by a lot of people.",
                        },
                    },
                ),
            ]
            for teacher_name, datatype, example in teachers_datatypes_and_examples:
                all_kwargs = {
                    'task': f'style_gen:{teacher_name}',
                    'datatype': datatype,
                    'datapath': data_path,
                }
                parser = setup_args()
                parser.set_defaults(**all_kwargs)
                opt = parser.parse_args([])
                agent = RepeatLabelAgent(opt)
                teacher = create_task(opt, agent).get_task_agent()
                actual_message = teacher.get(episode_idx=0, entry_idx=0)

                # Check for field equality
                self.assertEqual(set(actual_message.keys()),
                                 set(example.keys()))

                # Check label candidates
                if 'label_candidates' in example:
                    params = example['label_candidates']
                    self.assertEqual(len(actual_message['label_candidates']),
                                     params['num_cands'])
                    self.assertEqual(actual_message['label_candidates'][0],
                                     params['first'])
                    self.assertEqual(actual_message['label_candidates'][-1],
                                     params['last'])

                # Check other fields
                for key in [
                        k for k in example.keys() if k != 'label_candidates'
                ]:
                    self.assertEqual(example[key], actual_message[key])
Пример #30
0
 def setup_opt(opt):
     parser = dsd.setup_args()
     parser.set_params(**opt)
     return parser.parse_args([])