Пример #1
0
def distributed_zero_full_precision_lamb_bart(world_rank, world_size, device,
                                              checkpoint_dir):
    opts = {
        'device': {
            'id': device
        },
        'distributed': {
            'world_rank': world_rank,
            'world_size': world_size,
            'allreduce_post_accumulation': True,
            'deepspeed_zero_optimization': {
                'stage': 1
            }
        },
        'debug': {
            'deterministic_compute': True
        }
    }
    create_orttrainer_and_save_checkpoint_bart(
        device,
        opts,
        checkpoint_dir,
        state_dict_key_name='state_dict_' + str(world_rank))
def distributed_zero_full_precision_lamb_bart(world_rank, world_size, device,
                                              checkpoint_dir):
    opts = {
        "device": {
            "id": device
        },
        "distributed": {
            "world_rank": world_rank,
            "world_size": world_size,
            "allreduce_post_accumulation": True,
            "deepspeed_zero_optimization": {
                "stage": 1
            },
        },
        "debug": {
            "deterministic_compute": True
        },
    }
    create_orttrainer_and_save_checkpoint_bart(
        device,
        opts,
        checkpoint_dir,
        state_dict_key_name="state_dict_" + str(world_rank))
Пример #3
0
def distributed_megatron_mixed_precision_lamb(world_rank, world_size, device,
                                              checkpoint_dir):
    opts = {
        'device': {
            'id': device
        },
        'mixed_precision': {
            'enabled': True
        },
        'distributed': {
            'world_rank': world_rank,
            'world_size': world_size,
            'allreduce_post_accumulation': True,
            'horizontal_parallel_size': world_size
        },
        'debug': {
            'deterministic_compute': True
        }
    }
    create_orttrainer_and_save_checkpoint_bart(
        device,
        opts,
        checkpoint_dir,
        state_dict_key_name='state_dict_' + str(world_rank))
def distributed_megatron_mixed_precision_lamb(world_rank, world_size, device,
                                              checkpoint_dir):
    opts = {
        "device": {
            "id": device
        },
        "mixed_precision": {
            "enabled": True
        },
        "distributed": {
            "world_rank": world_rank,
            "world_size": world_size,
            "allreduce_post_accumulation": True,
            "horizontal_parallel_size": world_size,
        },
        "debug": {
            "deterministic_compute": True
        },
    }
    create_orttrainer_and_save_checkpoint_bart(
        device,
        opts,
        checkpoint_dir,
        state_dict_key_name="state_dict_" + str(world_rank))
Пример #5
0
def single_node_full_precision_bart(checkpoint_dir, device='cuda'):
    opts = {'device': {'id': device}, 'debug': {'deterministic_compute': True}}
    create_orttrainer_and_save_checkpoint_bart(device, opts, checkpoint_dir)
def single_node_full_precision_bart(checkpoint_dir, device="cuda"):
    opts = {"device": {"id": device}, "debug": {"deterministic_compute": True}}
    create_orttrainer_and_save_checkpoint_bart(device, opts, checkpoint_dir)