Пример #1
0
    def test_untied_weights(self):
        class DummyDataLayer(DataLayerNM):
            def __init__(self, vocab_size):
                super().__init__()
                self.vocab_size = vocab_size

                class DummyDS(torch.utils.data.Dataset):
                    def __init__(self, vocab_size):
                        super().__init__()

                    def __getitem__(self, index):
                        model_inputs = torch.randint(high=vocab_size, size=[10])
                        model_outputs = torch.randint(high=vocab_size, size=[10])
                        return (model_inputs, model_outputs)

                    def __len__(self):
                        return 10

                self._dataset = DummyDS(vocab_size)

            @property
            def output_ports(self):
                return {
                    "model_inputs": NeuralType(('B', 'T')),
                    "model_outputs": NeuralType(('B', 'T'), LabelsType()),
                }

            def __len__(self):
                return len(self._dataset)

            @property
            def dataset(self):
                return self._dataset

            def data_iterator(self):
                pass

        voc_size = 10
        dim = 10
        embd = nemo.backends.pytorch.common.other.SequenceEmbedding(voc_size=voc_size, hidden_size=dim)
        proj = TokenClassifier(hidden_size=dim, num_classes=voc_size)
        data = DummyDataLayer(voc_size)
        loss = SmoothedCrossEntropyLoss(pad_id=0)
        # embd.tie_weights_with(
        #     proj,
        #     weight_names=["embedding.weight"],
        #     name2name_and_transform={"embedding.weight": ("mlp.layer2.weight", WeightShareTransform.SAME)},
        # )
        self.assertFalse(
            np.array_equal(embd.embedding.weight.detach().cpu().numpy(), proj.mlp.layer2.weight.detach().cpu().numpy())
        )
        _in, _out = data()
        pred = embd(input_seq=_in)
        pred = proj(hidden_states=pred)
        loss_t = loss(labels=_out, logits=pred)

        self.nf.train(
            [loss_t], optimizer="sgd", optimization_params={"max_steps": 5, "lr": 0.0003},
        )

        self.assertFalse(
            np.array_equal(embd.embedding.weight.detach().cpu().numpy(), proj.mlp.layer2.weight.detach().cpu().numpy())
        )
Пример #2
0
encoder = nemo_nlp.nm.trainables.TransformerEncoderNM(
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=vocab_size,
    mask_future=True,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_seq_length,
)

log_softmax = TokenClassifier(args.d_model,
                              num_classes=vocab_size,
                              num_layers=1,
                              log_softmax=True)

loss = SmoothedCrossEntropyLoss(pad_id=tokenizer.pad_id,
                                label_smoothing=args.label_smoothing)

# tie weight of embedding and log_softmax layers
# log_softmax.mlp.last_linear_layer.weight = encoder.embedding_layer.token_embedding.weight
log_softmax.tie_weights_with(
    encoder,
    weight_names=["mlp.layer0.weight"],
    name2name_and_transform={
        "mlp.layer0.weight":
        ("embedding_layer.token_embedding.weight", WeightShareTransform.SAME)
    },
)