Пример #1
0
    def test_priorities(self):
        """ Tests component priorities. """
        # Instantiate.
        ConfigRegistry()._clear_registry()
        config = ConfigInterface()
        config.add_default_params({
            'bow_encoder2': {
                'type': 'BOWEncoder',
                'priority': 2.1
            },
            'bow_encoder1': {
                'type': 'BOWEncoder',
                'priority': 0.1
            }
        })
        pipe = PipelineManager('testpm', config)
        pipe.build(False)

        # Assert the right order of components.
        self.assertEqual(len(pipe), 2)
        self.assertEqual(pipe[0].name, 'bow_encoder1')
        self.assertEqual(pipe[1].name, 'bow_encoder2')


#if __name__ == "__main__":
#    unittest.main()
    def test_create_subset_random_sampler_list_of_indices(self):
        """ Tests whther SubsetRandomSampler accepts 'indices' with the option 3: list of indices. """

        yaml_list = yaml.load('[0, 2, 5, 10]')
        config = ConfigInterface()
        config.add_default_params({'name': 'SubsetRandomSampler',
                                'indices': yaml_list})
        # Create the sampler.
        sampler = SamplerFactory.build(TestProblemMockup(), config)

        # Check number of samples.
        self.assertEqual(len(sampler), 4)
    def test_create_subset_random_sampler_range_str(self):
        """ Tests whther SubsetRandomSampler accepts 'indices' with the option 2: range as str. """

        range_str = '0, 20'
        config = ConfigInterface()
        config.add_default_params({'name': 'SubsetRandomSampler',
                                'indices': range_str})
        # Create the sampler.
        sampler = SamplerFactory.build(TestProblemMockup(), config)

        # Check number of samples.
        self.assertEqual(len(sampler), 20)
    def test_create_subset_random_sampler_range(self):
        """ Tests whther SubsetRandomSampler accepts 'indices' with the option 1: range. """

        indices = range(20)
        config = ConfigInterface()
        config.add_default_params({
            'type': 'SubsetRandomSampler',
            'indices': indices
        })
        # Create the sampler.
        sampler = SamplerFactory.build(TestTaskMockup(), config, "training")

        # Check number of samples.
        self.assertEqual(len(sampler), 20)
Пример #5
0
    def test_default_params(self):
        config = ConfigInterface()
        # Add params - first method.
        config.add_default_params({'default_0': {'default_1': 'str'}})
        self.assertNotEqual(config['default_0'], None)
        self.assertEqual(config['default_0']['default_1'], 'str')

        # Remove params - first method.
        config.del_default_params(['default_0', 'default_1'])
        with self.assertRaises(KeyError):
            _ = config['default_0']['default_1']

        # Add params - second method.
        config['default_0'].add_default_params({'default_2': 'str'})

        # Remove params - second method.
        config['default_0'].del_default_params('default_2')
        with self.assertRaises(KeyError):
            _ = config['default_0']['default_2']

        # Add 3rd parameter under 0.
        config['default_0'].add_default_params({'default_3': 'str'})

        # Remove the main section.
        config.del_default_params('default_0')
        with self.assertRaises(KeyError):
            _ = config['default_0']
Пример #6
0
    def test_create_component_type(self):
        """ Tests whether component can be created when using only module name. """
        # Instantiate.
        ConfigRegistry()._clear_registry()
        config = ConfigInterface()
        config.add_default_params(
            {'bow_encoder': {
                'type': 'BOWEncoder',
                'priority': 1.2
            }})
        # Build object.
        pipe = PipelineManager('testpm', config)
        pipe.build(False)

        # Assert type.
        self.assertEqual(type(pipe[0]).__name__, "BOWEncoder")
Пример #7
0
    def test_training_set(self):
        """
            Tests the training split.

            ..note:
                Test on real data is performed only if json file '~/data/CLEVR_v1.0/questions/CLEVR_train_questions.json' is found.
        """
        # Empty config.
        config = ConfigInterface("CLEVR")
        config.add_config_params({"clevr_training": {"split": "training", "globals": {"image_height": "clevr_image_height", "image_width": "clevr_image_width"}}})

        # Check the existence of test set.
        if False: #check_file_existence(path.expanduser('~/data/CLEVR_v1.0/questions'),'CLEVR_train_questions.json'):

            # Create object.
            clevr = CLEVR("clevr_training", config["clevr_training"])
            
            # Check dataset size.
            self.assertEqual(len(clevr), 699989)

            # Get sample.
            sample = clevr[0]

        else: 
            dataset_content = [{'image_index': 0, 'program': [{'inputs': [], 'function': 'scene', 'value_inputs': []}, {'inputs': [0], 'function': 'filter_size', 'value_inputs': ['large']}, 
            {'inputs': [1], 'function': 'filter_color', 'value_inputs': ['green']}, {'inputs': [2], 'function': 'count', 'value_inputs': []}, 
            {'inputs': [], 'function': 'scene', 'value_inputs': []}, {'inputs': [4], 'function': 'filter_size', 'value_inputs': ['large']}, 
            {'inputs': [5], 'function': 'filter_color', 'value_inputs': ['purple']}, {'inputs': [6], 'function': 'filter_material', 'value_inputs': ['metal']}, 
            {'inputs': [7], 'function': 'filter_shape', 'value_inputs': ['cube']}, {'inputs': [8], 'function': 'count', 'value_inputs': []}, 
            {'inputs': [3, 9], 'function': 'greater_than', 'value_inputs': []}], 'question_index': 0, 'image_filename': 'CLEVR_train_000000.png', 'question_family_index': 2,
            'split': 'train', 'answer': 'yes', 'question': 'Are there more big green things than large purple shiny cubes?'}]

            # Mock up the load_dataset method.
            with patch( "ptp.components.tasks.image_text_to_class.clevr.CLEVR.load_dataset", MagicMock( side_effect = [ dataset_content ] )):
                clevr = CLEVR("clevr_training", config["clevr_training"])

            # Mock up the get_image method.
            with patch( "ptp.components.tasks.image_text_to_class.clevr.CLEVR.get_image", MagicMock( side_effect = [ "0" ] )):
                sample = clevr[0]

        # Check sample.
        self.assertEqual(sample['indices'], 0)
        self.assertEqual(sample['image_ids'], 'CLEVR_train_000000.png')
        self.assertEqual(sample['question_type_ids'], 4)
        self.assertEqual(sample['question_type_names'], 'greater_than')
        self.assertEqual(sample['questions'], 'Are there more big green things than large purple shiny cubes?')
        self.assertEqual(sample['answers'], 'yes')
Пример #8
0
    def __init__(self, *args, **kwargs):
        super(TestProblem, self).__init__(*args, **kwargs)

        # Overwrite abc abstract methods.
        MockupProblem.__abstractmethods__ = set()
        # Create mocked-up problem.
        config = ConfigInterface()
        self.problem = MockupProblem("test", config)
Пример #9
0
    def __init__(self, *args, **kwargs):
        super(TestTask, self).__init__(*args, **kwargs)

        # Overwrite abc abstract methods.
        MockupTask.__abstractmethods__ = set()
        # Create mocked-up task.
        config = ConfigInterface()
        self.task = MockupTask("test", config)
Пример #10
0
    def test_test_split(self):
        """
            Tests the test split.

            ..note:
                Test on real data is performed only if adequate json source file is found.
        """
        # Empty config.
        config = ConfigInterface()
        config.add_config_params({"gqa_test": {"split": "test", "globals": {"image_height": "gqa_image_height", "image_width": "gqa_image_width"}}})
    
        # Check the existence of test set.
        if False: #check_file_existence(path.expanduser('~/data/gqa/questions1.2'),'test_all_questions.json'):

            # Create object.
            task = GQA("gqa_test", config["gqa_test"])
            
            # Check dataset size.
            self.assertEqual(len(task), 1340048)

            # Get sample.
            sample = task[0]

        else: 
            processed_dataset_content = [ {'sample_ids': '201971873', 'image_ids': 'n15740', 'questions': 'Is the blanket to the right of a pillow?', 'answers': '<UNK>', 'full_answers': '<UNK>'} ]

            # Mock up the load_dataset method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.load_dataset", MagicMock( side_effect = [ processed_dataset_content ] )):
                task = GQA("gqa_test", config["gqa_test"])

            # Mock up the get_image method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.get_image", MagicMock( side_effect = [ "0" ] )):
                sample = task[0]
        
        # Check sample.
        self.assertEqual(sample['indices'], 0)
        self.assertEqual(sample['sample_ids'], '201971873')
        self.assertEqual(sample['image_ids'], 'n15740')
        self.assertEqual(sample['questions'], 'Is the blanket to the right of a pillow?')
        self.assertEqual(sample['answers'], '<UNK>')
        self.assertEqual(sample['full_answers'], '<UNK>')
        

#if __name__ == "__main__":
#    unittest.main()
Пример #11
0
    def test_test_set(self):
        """
            Tests the test split.

            ..note:
                Test on real data is performed only if json file '~/data/CLEVR_v1.0/questions/CLEVR_test_questions.json' is found.
        """
        # Empty config.
        config = ConfigInterface()
        config.add_config_params({"clevr_test": {"split": "test", "globals": {"image_height": "clevr_image_height", "image_width": "clevr_image_width"}}})
    
        # Check the existence of test set.
        if False: #check_file_existence(path.expanduser('~/data/CLEVR_v1.0/questions'),'CLEVR_test_questions.json'):

            # Create object.
            clevr = CLEVR("clevr_test", config["clevr_test"])
            
            # Check dataset size.
            self.assertEqual(len(clevr), 149988)

            # Get sample.
            sample = clevr[0]

        else: 
            dataset_content = [{'image_index': 0, 'split': 'test', 'image_filename': 'CLEVR_test_000000.png', 'question_index': 0, 'question': 'Is there anything else that is the same shape as the small brown matte object?'}]

            # Mock up the load_dataset method.
            with patch( "ptp.components.tasks.image_text_to_class.clevr.CLEVR.load_dataset", MagicMock( side_effect = [ dataset_content ] )):
                clevr = CLEVR("clevr_test", config["clevr_test"])

            # Mock up the get_image method.
            with patch( "ptp.components.tasks.image_text_to_class.clevr.CLEVR.get_image", MagicMock( side_effect = [ "0" ] )):
                sample = clevr[0]

        # Check sample.
        self.assertEqual(sample['indices'], 0)
        self.assertEqual(sample['image_ids'], 'CLEVR_test_000000.png')
        self.assertEqual(sample['question_type_ids'], -1)
        self.assertEqual(sample['question_type_names'], '<UNK>')
        self.assertEqual(sample['questions'], 'Is there anything else that is the same shape as the small brown matte object?')
        self.assertEqual(sample['answers'], '<UNK>')
        

#if __name__ == "__main__":
#    unittest.main()
Пример #12
0
    def test_disable_component(self):
        """ Tests whether skipping (disable) works properly. """
        # Set param registry.
        ConfigRegistry()._clear_registry()
        config = ConfigInterface()
        config.add_default_params({
            'disable': 'bow_encoder',
            'bow_encoder': {
                'type': 'BOWEncoder',
                'priority': 1
            }
        })
        # Build object.
        pipe = PipelineManager('testpm', config)
        pipe.build(False)

        # Assert no components were created.
        self.assertEqual(len(pipe), 0)
Пример #13
0
    def test_validation_set(self):
        """
            Tests the validation split.

            ..note:
                Test on real data is performed only if json file '~/data/CLEVR_v1.0/questions/CLEVR_val_questions.json' is found.
        """
        # Empty config.
        config = ConfigInterface()
        config.add_config_params({"clevr_validation": {"split": "validation", "globals": {"image_height": "clevr_image_height", "image_width": "clevr_image_width"}}})

        # Check the existence of test set.
        if False: #check_file_existence(path.expanduser('~/data/CLEVR_v1.0/questions'),'CLEVR_test_questions.json'):

            # Create object.
            clevr = CLEVR("clevr_validation", config["clevr_validation"])
            
            # Check dataset size.
            self.assertEqual(len(clevr), 149991)

            # Get sample.
            sample = clevr[0]

        else: 
            dataset_content = [{'image_index': 0, 'program': [{'inputs': [], 'function': 'scene', 'value_inputs': []}, {'inputs': [0], 'function': 'filter_size', 'value_inputs': ['large']}, 
                {'inputs': [1], 'function': 'filter_material', 'value_inputs': ['metal']}, {'inputs': [2], 'function': 'unique', 'value_inputs': []}, 
                {'inputs': [3], 'function': 'same_shape', 'value_inputs': []}, {'inputs': [4], 'function': 'exist', 'value_inputs': []}], 
                'question_index': 0, 'image_filename': 'CLEVR_val_000000.png', 'question_family_index': 39, 'split': 'val', 'answer': 'no', 'question': 'Are there any other things that are the same shape as the big metallic object?'}]

            # Mock up the load_dataset method.
            with patch( "ptp.components.tasks.image_text_to_class.clevr.CLEVR.load_dataset", MagicMock( side_effect = [ dataset_content ] )):
                clevr = CLEVR("clevr_validation", config["clevr_validation"])

            # Mock up the get_image method.
            with patch( "ptp.components.tasks.image_text_to_class.clevr.CLEVR.get_image", MagicMock( side_effect = [ "0" ] )):
                sample = clevr[0]

        # Check sample.
        self.assertEqual(sample['indices'], 0)
        self.assertEqual(sample['image_ids'], 'CLEVR_val_000000.png')
        self.assertEqual(sample['question_type_ids'], 10)
        self.assertEqual(sample['question_type_names'], 'exist')
        self.assertEqual(sample['questions'], 'Are there any other things that are the same shape as the big metallic object?')
        self.assertEqual(sample['answers'], 'no')
Пример #14
0
    def __init__(self, *args, **kwargs):
        super(TestComponent, self).__init__(*args, **kwargs)

        # Overwrite abc abstract methods.
        MockupComponent.__abstractmethods__=set()
        MockupTask.__abstractmethods__=set()

        # Create mocked-up component.
        config = ConfigInterface()
        self.task = MockupTask("test_task", config)
        self.component = MockupComponent("test_component", config)
    def test_create_subset_random_sampler_file(self):
        """ Tests whther SubsetRandomSampler accepts 'indices' with the option 4: name of the file containing indices. """

        filename = "/tmp/tmp_indices.txt"
        # Store indices to file.
        indices = np.asarray([1,2,3,4,5],dtype=int)
        # Write array to file, separate elements with commas.
        indices.tofile(filename, sep=',', format="%s")

        config = ConfigInterface()
        config.add_default_params({'name': 'SubsetRandomSampler',
                                'indices': filename})
        # Create the sampler.
        sampler = SamplerFactory.build(TestProblemMockup(), config)

        # Check number of samples.
        self.assertEqual(len(sampler), 5)

#if __name__ == "__main__":
#    unittest.main()
Пример #16
0
    def test_test_dev_split(self):
        """
            Tests the test_dev split.

            ..note:
                Test on real data is performed only if adequate json source file is found.
        """
        # Empty config.
        config = ConfigInterface()
        config.add_config_params({"gqa_testdev": {"split": "test_dev", "globals": {"image_height": "gqa_image_height", "image_width": "gqa_image_width"}}})
    
        # Check the existence of test set.
        if False: #check_file_existence(path.expanduser('~/data/gqa/questions1.2'),'testdev_all_questions.json'):

            # Create object.
            task = GQA("gqa_testdev", config["gqa_testdev"])
            
            # Check dataset size.
            self.assertEqual(len(task), 172174)

            # Get sample.
            sample = task[0]

        else: 
            processed_dataset_content = [ {'sample_ids': '20968379', 'image_ids': 'n288870', 'questions': 'Do the shorts have dark color?', 'answers': 'yes', 'full_answers': 'Yes, the shorts are dark.'} ]

            # Mock up the load_dataset method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.load_dataset", MagicMock( side_effect = [ processed_dataset_content ] )):
                task = GQA("gqa_testdev", config["gqa_testdev"])

            # Mock up the get_image method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.get_image", MagicMock( side_effect = [ "0" ] )):
                sample = task[0]

        # Check sample.
        self.assertEqual(sample['indices'], 0)
        self.assertEqual(sample['sample_ids'], '20968379')
        self.assertEqual(sample['image_ids'], 'n288870')
        self.assertEqual(sample['questions'], 'Do the shorts have dark color?')
        self.assertEqual(sample['answers'], 'yes')
        self.assertEqual(sample['full_answers'], 'Yes, the shorts are dark.')
Пример #17
0
    def test_training_0_split(self):
        """
            Tests the training_0 split.

            ..note:
                Test on real data is performed only if adequate json source file is found.
        """
        # Empty config.
        config = ConfigInterface()
        config.add_config_params({"gqa_training_0": {"split": "training_0", "globals": {"image_height": "gqa_image_height", "image_width": "gqa_image_width"}}})
    
        # Check the existence of test set.
        if False: #check_file_existence(path.expanduser('~/data/gqa/questions1.2/train_all_questions'),'train_all_questions_0.json'):

            # Create object.
            task = GQA("gqa_training_0", config["gqa_training_0"])
            
            # Check dataset size.
            self.assertEqual(len(task), 1430536)

            # Get sample.
            sample = task[0]

        else: 
            processed_dataset_content = [ {'sample_ids': '07333408', 'image_ids': '2375429', 'questions': 'What is on the white wall?', 'answers': 'pipe', 'full_answers': 'The pipe is on the wall.'} ]

            # Mock up the load_dataset method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.load_dataset", MagicMock( side_effect = [ processed_dataset_content ] )):
                task = GQA("gqa_training_0", config["gqa_training_0"])

            # Mock up the get_image method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.get_image", MagicMock( side_effect = [ "0" ] )):
                sample = task[0]

        # Check sample.
        self.assertEqual(sample['indices'], 0)
        self.assertEqual(sample['sample_ids'], '07333408')
        self.assertEqual(sample['image_ids'], '2375429')
        self.assertEqual(sample['questions'], 'What is on the white wall?')
        self.assertEqual(sample['answers'], 'pipe')
        self.assertEqual(sample['full_answers'], 'The pipe is on the wall.')
Пример #18
0
    def test_validation_split(self):
        """
            Tests the validation split.

            ..note:
                Test on real data is performed only if adequate json source file is found.
        """
        # Empty config.
        config = ConfigInterface()
        config.add_config_params({"gqa_validation": {"split": "validation", "globals": {"image_height": "gqa_image_height", "image_width": "gqa_image_width"}}})
    
        # Check the existence of test set.
        if False: #check_file_existence(path.expanduser('~/data/gqa/questions1.2'),'val_all_questions.json'):

            # Create object.
            task = GQA("gqa_validation", config["gqa_validation"])
            
            # Check dataset size.
            self.assertEqual(len(task), 2011853)

            # Get sample.
            sample = task[0]

        else: 
            processed_dataset_content = [ {'sample_ids': '05451384', 'image_ids': '2382986', 'questions': 'Are there blankets under the brown cat?', 'answers': 'no', 'full_answers': 'No, there is a towel under the cat.'} ]

            # Mock up the load_dataset method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.load_dataset", MagicMock( side_effect = [ processed_dataset_content ] )):
                task = GQA("gqa_validation", config["gqa_validation"])

            # Mock up the get_image method.
            with patch( "ptp.components.tasks.image_text_to_class.gqa.GQA.get_image", MagicMock( side_effect = [ "0" ] )):
                sample = task[0]

        # Check sample.
        self.assertEqual(sample['indices'], 0)
        self.assertEqual(sample['sample_ids'], '05451384')
        self.assertEqual(sample['image_ids'], '2382986')
        self.assertEqual(sample['questions'], 'Are there blankets under the brown cat?')
        self.assertEqual(sample['answers'], 'no')
        self.assertEqual(sample['full_answers'], 'No, there is a towel under the cat.')
Пример #19
0
    def test_overwrite_params(self):
        config = ConfigInterface()
        config.add_config_params({'under': True})
        config.add_default_params({'under': False})
        self.assertEqual(config['under'], True)

#if __name__ == "__main__":
#    unittest.main()
Пример #20
0
    def test_config_params(self):
        config = ConfigInterface()
        # Add params.
        config.add_config_params({'config_0': {'config_1': 'int'}})
        self.assertNotEqual(config['config_0'], None)
        self.assertEqual(config['config_0']['config_1'], 'int')

        # Remove params.
        config.del_config_params(['config_0', 'config_1'])
        with self.assertRaises(KeyError):
            _ = config['config_0']['config_1']
Пример #21
0
class Worker(object):
    """
    Base abstract class for the workers.
    All base workers should subclass it and override the relevant methods.
    """
    def __init__(self, name, add_default_parser_args=True):
        """
        Base constructor for all workers:

            - Initializes the AppState singleton:

                >>> self.app_state = AppState()

            - Initializes the Configuration Registry:

                >>> self.config = ConfigInterface()

            - Creates parser and adds default worker command line arguments.

        :param name: Name of the worker.
        :type name: str

        :param add_default_parser_args: If set, adds default parser arguments (DEFAULT: True).
        :type add_default_parser_args: bool

        """
        # Call base constructor.
        super(Worker, self).__init__()

        # Set worker name.
        self.name = name

        # Initialize the application state singleton.
        self.app_state = AppState()

        # Initialize parameter interface/registry.
        self.config = ConfigInterface()

        # Create parser with a list of runtime arguments.
        self.parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter)

        # Add arguments to the specific parser.
        if add_default_parser_args:
            # These arguments will be shared by all basic workers.
            self.parser.add_argument(
                '--config',
                dest='config',
                type=str,
                default='',
                help='Name of the configuration file(s) to be loaded. '
                'If specifying more than one file, they must be separated with coma ",".'
            )

            self.parser.add_argument(
                '--disable',
                type=str,
                default='',
                dest='disable',
                help=
                'Comma-separated list of components to be disabled (DEFAULT: empty)'
            )

            self.parser.add_argument(
                '--load',
                type=str,
                default='',
                dest='load_checkpoint',
                help=
                'Path and name of the checkpoint file containing the saved parameters'
                ' of the pipeline models to load (should end with a .pt extension)'
            )

            self.parser.add_argument(
                '--gpu',
                dest='use_gpu',
                action='store_true',
                help=
                'The current worker will move the computations on GPU devices, if available '
                'in the system. (Default: False)')

            self.parser.add_argument(
                '--expdir',
                dest='expdir',
                type=str,
                default="~/experiments",
                help=
                'Path to the directory where the experiment(s) folders are/will be stored.'
                ' (DEFAULT: ~/experiments)')

            self.parser.add_argument('--savetag',
                                     dest='savetag',
                                     type=str,
                                     default='',
                                     help='Tag for the save directory.')

            self.parser.add_argument('--logger',
                                     action='store',
                                     dest='log_level',
                                     type=str,
                                     default='INFO',
                                     choices=[
                                         'CRITICAL', 'ERROR', 'WARNING',
                                         'INFO', 'DEBUG', 'NOTSET'
                                     ],
                                     help="Log level. (DEFAULT: INFO)")

            self.parser.add_argument(
                '--interval',
                dest='logging_interval',
                default=100,
                type=int,
                help=
                'Statistics logging interval. Will impact logging to the logger and '
                'exporting to TensorBoard. Writing to the csv file is not impacted '
                '(exports at every step). (DEFAULT: 100, i.e. logs every 100 episodes).'
            )

            self.parser.add_argument(
                '--agree',
                dest='confirm',
                action='store_true',
                help=
                'Request user confirmation just after loading the settings, '
                'before starting the experiment. (DEFAULT: False)')

    def setup_experiment(self):
        """
        Setups a specific experiment.

        Base method:

            - Parses command line arguments.

            - Initializes logger with worker name.

            - Sets the 3 default config sections (training / validation / test) and sets their dataloaders params.

        .. note::

            Child classes should override this method, but still call its parent to draw the basic functionality \
            implemented here.


        """
        # Parse arguments.
        self.app_state.args, self.unparsed = self.parser.parse_known_args()

        # Initialize logger using the configuration.
        # For now do not add file handler, as path to logfile is not known yet.
        self.logger = logging.initialize_logger(self.name, False)

        # add empty sections
        self.config.add_default_params(
            {"training": {
                'terminal_conditions': {}
            }})
        self.config.add_default_params({"validation": {}})
        self.config.add_default_params({"testing": {}})

    def add_statistics(self, stat_col):
        """
        Adds most elementary shared statistics to ``StatisticsCollector``: episode.

        :param stat_col: ``StatisticsCollector``.

        """
        # Add default statistics with formatting.
        stat_col.add_statistics('episode', '{:06d}')

    def add_aggregators(self, stat_agg):
        """
        Adds basic statistical aggregators to ``StatisticsAggregator``: episode \
        episodes_aggregated.

        :param stat_agg: ``StatisticsAggregator``.

        """
        # add 'aggregators' for the episode.
        #stat_agg.add_aggregator('epoch', '{:02d}')
        stat_agg.add_aggregator('episode', '{:06d}')
        # Number of aggregated episodes.
        stat_agg.add_aggregator('episodes_aggregated', '{:06d}')

    @abstractmethod
    def run_experiment(self):
        """
        Main function of the worker which executes a specific experiment.

        .. note::

            Abstract. Should be implemented in the subclasses.
        """

    def collect_all_statistics(self, problem_mgr, pipeline_mgr, data_dict,
                               stat_col):
        """
        Function that collects statistics

        :param pipeline: Pipeline containing both problem and list of components.
        :type pipeline: ``configuration.pipeline.Pipeline``

        :param problem_mgr: Problem manager.

        :param data_dict: contains the batch of samples to pass through the pipeline.
        :type data_dict: ``DataDict``

        :param stat_col: statistics collector used for logging accuracy etc.
        :type stat_col: ``StatisticsCollector``

        """
        # Collect "local" statistics.
        stat_col['episode'] = self.app_state.episode
        if ('epoch' in stat_col) and (self.app_state.epoch is not None):
            stat_col['epoch'] = self.app_state.epoch

        # Collect rest of statistics.
        problem_mgr.problem.collect_statistics(stat_col, data_dict)
        pipeline_mgr.collect_statistics(stat_col, data_dict)

    def aggregate_all_statistics(self, problem_mgr, pipeline_mgr, stat_col,
                                 stat_agg):
        """
        Aggregates the collected statistics. Exports the aggregations to logger, csv and TB. \
        Empties statistics collector for the next episode.

        :param pipeline: Pipeline containing both problem and list of components.
        :type pipeline: ``configuration.pipeline.Pipeline``

        :param problem_mgr: Problem manager.

        :param stat_col: ``StatisticsCollector`` object.

        :param stat_agg: ``StatisticsAggregator`` object.
        """
        # Aggregate "local" statistics.
        if ('epoch' in stat_col) and ('epoch'
                                      in stat_agg) and (self.app_state.epoch
                                                        is not None):
            stat_agg.aggregators['epoch'] = self.app_state.epoch
        stat_agg.aggregators['episode'] = self.app_state.episode
        stat_agg.aggregators['episodes_aggregated'] = len(stat_col['episode'])
        # Aggregate rest of statistics.
        problem_mgr.problem.aggregate_statistics(stat_col, stat_agg)
        pipeline_mgr.aggregate_statistics(stat_col, stat_agg)

    def export_all_statistics(self, stat_obj, tag='', export_to_log=True):
        """
        Export the statistics/aggregations to logger, csv and TB.

        :param stat_obj: ``StatisticsCollector`` or ``StatisticsAggregato`` object.

        :param tag: Additional tag that will be added to string exported to logger, optional (DEFAULT = '').
        :type tag: str

        :param export_to_log: If True, exports statistics to logger (DEFAULT: True)
        :type export_to_log: bool

        """
        # Log to logger
        if export_to_log:
            self.logger.info(stat_obj.export_to_string(tag))

        # Export to csv
        stat_obj.export_to_csv()

        # Export to TensorBoard.
        stat_obj.export_to_tensorboard()

    def set_random_seeds(self, section_name, config):
        """
        Set ``torch`` & ``NumPy`` random seeds from the ``ParamRegistry``: \
        If one was indicated, use it, or set a random one.

        :param section_name: Name of the section (for logging purposes only).
        :type section_name: str

        :param config: Section in config registry that will be changed \
            ("training" or "testing" only will be taken into account.)

        """
        # Set the random seeds: either from the loaded configuration or a default randomly selected one.
        config.add_default_params({"seed_numpy": -1})
        if config["seed_numpy"] == -1:
            seed = randrange(0, 2**32)
            # Overwrite the config param!
            config.add_config_params({"seed_numpy": seed})

        self.logger.info("Setting numpy random seed in {} to: {}".format(
            section_name, config["seed_numpy"]))
        np.random.seed(config["seed_numpy"])

        config.add_default_params({"seed_torch": -1})
        if config["seed_torch"] == -1:
            seed = randrange(0, 2**32)
            # Overwrite the config param!
            config.add_config_params({"seed_torch": seed})

        self.logger.info("Setting torch random seed in {} to: {}".format(
            section_name, config["seed_torch"]))
        torch.manual_seed(config["seed_torch"])
        torch.cuda.manual_seed_all(config["seed_torch"])
Пример #22
0
    def __init__(self, name, add_default_parser_args=True):
        """
        Base constructor for all workers:

            - Initializes the AppState singleton:

                >>> self.app_state = AppState()

            - Initializes the Configuration Registry:

                >>> self.config = ConfigInterface()

            - Creates parser and adds default worker command line arguments.

        :param name: Name of the worker.
        :type name: str

        :param add_default_parser_args: If set, adds default parser arguments (DEFAULT: True).
        :type add_default_parser_args: bool

        """
        # Call base constructor.
        super(Worker, self).__init__()

        # Set worker name.
        self.name = name

        # Initialize the application state singleton.
        self.app_state = AppState()

        # Initialize parameter interface/registry.
        self.config = ConfigInterface()

        # Create parser with a list of runtime arguments.
        self.parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter)

        # Add arguments to the specific parser.
        if add_default_parser_args:
            # These arguments will be shared by all basic workers.
            self.parser.add_argument(
                '--config',
                dest='config',
                type=str,
                default='',
                help='Name of the configuration file(s) to be loaded. '
                'If specifying more than one file, they must be separated with coma ",".'
            )

            self.parser.add_argument(
                '--disable',
                type=str,
                default='',
                dest='disable',
                help=
                'Comma-separated list of components to be disabled (DEFAULT: empty)'
            )

            self.parser.add_argument(
                '--load',
                type=str,
                default='',
                dest='load_checkpoint',
                help=
                'Path and name of the checkpoint file containing the saved parameters'
                ' of the pipeline models to load (should end with a .pt extension)'
            )

            self.parser.add_argument(
                '--gpu',
                dest='use_gpu',
                action='store_true',
                help=
                'The current worker will move the computations on GPU devices, if available '
                'in the system. (Default: False)')

            self.parser.add_argument(
                '--expdir',
                dest='expdir',
                type=str,
                default="~/experiments",
                help=
                'Path to the directory where the experiment(s) folders are/will be stored.'
                ' (DEFAULT: ~/experiments)')

            self.parser.add_argument('--savetag',
                                     dest='savetag',
                                     type=str,
                                     default='',
                                     help='Tag for the save directory.')

            self.parser.add_argument('--logger',
                                     action='store',
                                     dest='log_level',
                                     type=str,
                                     default='INFO',
                                     choices=[
                                         'CRITICAL', 'ERROR', 'WARNING',
                                         'INFO', 'DEBUG', 'NOTSET'
                                     ],
                                     help="Log level. (DEFAULT: INFO)")

            self.parser.add_argument(
                '--interval',
                dest='logging_interval',
                default=100,
                type=int,
                help=
                'Statistics logging interval. Will impact logging to the logger and '
                'exporting to TensorBoard. Writing to the csv file is not impacted '
                '(exports at every step). (DEFAULT: 100, i.e. logs every 100 episodes).'
            )

            self.parser.add_argument(
                '--agree',
                dest='confirm',
                action='store_true',
                help=
                'Request user confirmation just after loading the settings, '
                'before starting the experiment. (DEFAULT: False)')
Пример #23
0
 def __init__(self):
     Component.__init__(self, "MockupComponent", None, ConfigInterface())