示例#1
0
def _configure_using_config_file(deepspeed_config, mpu=None):
    global num_layers, PARTITION_ACTIVATIONS, CONTIGUOUS_CHECKPOINTING, \
            PA_TO_CPU, SYNCHRONIZE, PROFILE_TIME

    config = DeepSpeedConfig(deepspeed_config, mpu=mpu).activation_checkpointing_config
    logger.info(config.repr())
    PARTITION_ACTIVATIONS = config.partition_activations
    CONTIGUOUS_CHECKPOINTING = config.contiguous_memory_optimization
    num_layers = config.number_checkpoints
    PA_TO_CPU = config.cpu_checkpointing
    SYNCHRONIZE = config.synchronize_checkpoint_boundary
    PROFILE_TIME = config.profile
示例#2
0
 def test_empty_csv_monitor(self):
     config_dict = {"train_batch_size": 2, "csv_monitor": {}}
     ds_config = DeepSpeedConfig(config_dict)
     csv_monitor = csvMonitor(ds_config.monitor_config)
     assert csv_monitor.enabled == CSV_MONITOR_ENABLED_DEFAULT
     assert csv_monitor.output_path == CSV_MONITOR_OUTPUT_PATH_DEFAULT
     assert csv_monitor.job_name == CSV_MONITOR_JOB_NAME_DEFAULT
示例#3
0
 def test_empty_tensorboard(self):
     config_dict = {"train_batch_size": 2, "tensorboard": {}}
     ds_config = DeepSpeedConfig(config_dict)
     tb_monitor = TensorBoardMonitor(ds_config.monitor_config)
     assert tb_monitor.enabled == TENSORBOARD_ENABLED_DEFAULT
     assert tb_monitor.output_path == TENSORBOARD_OUTPUT_PATH_DEFAULT
     assert tb_monitor.job_name == TENSORBOARD_JOB_NAME_DEFAULT
示例#4
0
 def test_empty_wandb(self):
     config_dict = {"train_batch_size": 2, "wandb": {}}
     ds_config = DeepSpeedConfig(config_dict)
     wandb_monitor = WandbMonitor(ds_config.monitor_config)
     assert wandb_monitor.enabled == WANDB_ENABLED_DEFAULT
     assert wandb_monitor.group == WANDB_GROUP_NAME_DEFAULT
     assert wandb_monitor.team == WANDB_TEAM_NAME_DEFAULT
     assert wandb_monitor.project == WANDB_PROJECT_NAME_DEFAULT
示例#5
0
    def _test_batch_config(num_ranks, batch, micro_batch, gas, success):
        assert dist.get_world_size() == num_ranks, \
        'The test assumes a world size of f{num_ranks}'

        ds_batch_config = 'tests/unit/ds_batch_config.json'
        ds_config = DeepSpeedConfig(ds_batch_config)

        #test cases when all parameters are provided
        status = _run_batch_config(ds_config,
                                   train_batch=batch,
                                   micro_batch=micro_batch,
                                   gas=gas)
        _batch_assert(status, ds_config, batch, micro_batch, gas, success)

        #test cases when two out of three parameters are provided
        status = _run_batch_config(ds_config,
                                   train_batch=batch,
                                   micro_batch=micro_batch)
        _batch_assert(status, ds_config, batch, micro_batch, gas, success)

        if success:
            #when gas is provided with one more parameter
            status = _run_batch_config(ds_config, train_batch=batch, gas=gas)
            _batch_assert(status, ds_config, batch, micro_batch, gas, success)

            status = _run_batch_config(ds_config,
                                       micro_batch=micro_batch,
                                       gas=gas)
            _batch_assert(status, ds_config, batch, micro_batch, gas, success)

            #test the case when only micro_batch or train_batch is provided
            if gas == 1:
                status = _run_batch_config(ds_config, micro_batch=micro_batch)
                _batch_assert(status, ds_config, batch, micro_batch, gas,
                              success)

                status = _run_batch_config(ds_config, train_batch=batch)
                _batch_assert(status, ds_config, batch, micro_batch, gas,
                              success)
        else:
            #when only gas is provided
            status = _run_batch_config(ds_config, gas=gas)
            _batch_assert(status, ds_config, batch, micro_batch, gas, success)

            #when gas is provided with something else and gas does not divide batch
            if gas != 1:
                status = _run_batch_config(ds_config,
                                           train_batch=batch,
                                           gas=gas)
                _batch_assert(status, ds_config, batch, micro_batch, gas,
                              success)
示例#6
0
 def test_csv_monitor(self):
     config_dict = {
         "train_batch_size": 2,
         "csv_monitor": {
             "enabled": True,
             "output_path": "test_output/ds_logs/",
             "job_name": "test"
         }
     }
     ds_config = DeepSpeedConfig(config_dict)
     csv_monitor = csvMonitor(ds_config.monitor_config)
     assert csv_monitor.enabled == True
     assert csv_monitor.output_path == "test_output/ds_logs/"
     assert csv_monitor.job_name == "test"
示例#7
0
 def test_tensorboard(self):
     config_dict = {
         "train_batch_size": 2,
         "tensorboard": {
             "enabled": True,
             "output_path": "test_output/ds_logs/",
             "job_name": "test"
         }
     }
     ds_config = DeepSpeedConfig(config_dict)
     tb_monitor = TensorBoardMonitor(ds_config.monitor_config)
     assert tb_monitor.enabled == True
     assert tb_monitor.output_path == "test_output/ds_logs/"
     assert tb_monitor.job_name == "test"
示例#8
0
 def test_wandb(self):
     config_dict = {
         "train_batch_size": 2,
         "wandb": {
             "enabled": False,
             "group": "my_group",
             "team": "my_team",
             "project": "my_project"
         }
     }
     ds_config = DeepSpeedConfig(config_dict)
     wandb_monitor = WandbMonitor(ds_config.monitor_config)
     assert wandb_monitor.enabled == False
     assert wandb_monitor.group == "my_group"
     assert wandb_monitor.team == "my_team"
     assert wandb_monitor.project == "my_project"