示例#1
0
def test_deepspeed_with_meta_device(tmpdir):
    with init_meta_context():
        model = BoringModel()
    assert model.layer.weight.device.type == "meta"
    trainer = Trainer(
        default_root_dir=tmpdir, strategy=DeepSpeedStrategy(stage=3), gpus=2, fast_dev_run=True, precision=16
    )
    trainer.fit(model)
    assert model.layer.weight.device.type == "cpu"
示例#2
0
def test_materialize_module_recursive_child():
    """Test materialize_module doesn't set a child recursively to a model instantiated within init_meta_context."""
    with init_meta_context():
        model = BoringModel()

    materialize_module(model)

    with pytest.raises(AttributeError,
                       match="'Linear' object has no attribute 'layer'"):
        model.layer.layer
示例#3
0
def test_init_meta_context():

    with init_meta_context():
        m = nn.Linear(in_features=1, out_features=1)
        assert isinstance(m, nn.Linear)
        assert m.weight.device.type == "meta"
        assert is_on_meta_device(m)
        mlp = MLP(4)
        assert mlp.layer[0].weight.device.type == "meta"

        mlp = materialize_module(mlp)
        assert mlp.layer[0].weight.device.type == "cpu"

        assert not is_on_meta_device(mlp)
        assert not is_on_meta_device(nn.Module())

        model = SimpleBoringModel(4)
        assert model.layer[0].weight.device.type == "meta"
        materialize_module(model)
        assert model.layer[0].weight.device.type == "cpu"

    mlp = MLP(4)
    assert mlp.layer[0].weight.device.type == "cpu"
    # no-op as already materialized.
    materialize_module(mlp)
    assert mlp.layer[0].weight.device.type == "cpu"

    m = nn.Linear(in_features=1, out_features=1)
    assert m.weight.device.type == "cpu"

    with init_meta_context():
        m = nn.Linear(in_features=1, out_features=1)
        assert m.weight.device.type == "meta"

    m = nn.Linear(in_features=1, out_features=1)
    assert m.weight.device.type == "cpu"
示例#4
0
文件: train.py 项目: SeanNaren/minGPT
    args = parser.parse_args()

    if not os.path.exists("input.txt"):
        os.system(
            "wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
        )

    text = open('input.txt',
                'r').read()  # don't worry we won't run out of file handles
    train_dataset = CharDataset(
        text, args.block_size)  # one line of poem is roughly 50 characters
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers)

    with init_meta_context():
        model = GPT(vocab_size=train_dataset.vocab_size,
                    block_size=train_dataset.block_size,
                    n_layer=args.n_layer,
                    n_head=args.n_head,
                    n_embd=args.n_embd,
                    learning_rate=args.learning_rate)

    lr_decay = LearningRateDecayCallback(learning_rate=6e-4,
                                         warmup_tokens=512 * 20,
                                         final_tokens=2 * len(train_dataset) *
                                         args.block_size)

    trainer = Trainer.from_argparse_args(
        args,
        max_epochs=10,