示例#1
0
    def build_modalities(cls, model_args, src_meta, trg_meta):
        """ Create source and target modality. """
        # if modality.source.dim is not defined, then use modality.dim as default
        src_dim = model_args["modality.source.dim"] or model_args["modality.dim"]
        # if modality.target.dim is not defined, then use modality.dim as default
        trg_dim = model_args["modality.target.dim"] or model_args["modality.dim"]
        # whether to share source and target embedding
        input_name = "input_audio_modality"
        target_name = "target_symbol_modality"
        # creates target embedding table
        target_modality = cls.build_modality(
            vocab_size=trg_meta["vocab_size"], emb_dim=trg_dim, name=target_name,
            timing=(model_args["modality.target.timing"] or model_args["modality.timing"]),
            share_embedding_and_softmax_weights=model_args["modality.share_embedding_and_softmax_weights"])

        # creates source audio modality
        input_modality = AudioConv2dSubsamplingLayer(
            embedding_dim=src_dim,
            kernel_size=model_args["modality.source.kernel_size"],
            strides=model_args["modality.source.strides"],
            channels=model_args["modality.source.channels"],
            layer_norm=model_args["modality.source.layer_norm"],
            name=input_name)
        src_timing = model_args["modality.source.timing"] or model_args["modality.timing"]
        if src_timing:
            if isinstance(src_timing, str):
                src_timing = {"timing": src_timing}
            elif not isinstance(src_timing, dict):
                raise ValueError("Unknown type of timing params: {}".format(str(src_timing)))
            input_modality = PositionEmbeddingWrapper(
                embedding_layer=input_modality, name=input_name + "_posenc_wrapper", **src_timing)
        return input_modality, target_modality
    def build_modality(cls,
                       vocab_size,
                       emb_dim,
                       name,
                       timing=None,
                       share_embedding_and_softmax_weights=False):
        """ Creates modality layer.

        Args:
            vocab_size: An integer, the vocabulary size.
            emb_dim: An integer, the dimension of the embedding.
            timing: A string or a dict of parameter for positional embedding parameters.
            name: A string, the layer name.
            share_embedding_and_softmax_weights: Whether to share the embedding table and softmax weight.

        Returns:
            A modality layer.
        """
        modality = WordEmbeddingSharedWeights(
            embedding_dim=emb_dim,
            vocab_size=vocab_size,
            share_softmax_weights=share_embedding_and_softmax_weights,
            name=name)
        # position embedding wrapper
        if timing:
            if isinstance(timing, str):
                timing = {"timing": timing}
            elif not isinstance(timing, dict):
                raise ValueError("Unknown type of timing params: {}".format(
                    str(timing)))
            modality = PositionEmbeddingWrapper(embedding_layer=modality,
                                                name=name + "_posenc_wrapper",
                                                **timing)
        return modality
示例#3
0
def test_position_embedding():
    tf_postbl = PositionEmbeddingWrapper.add_sinusoids_timing_signal(
        tf.zeros([1, 10, 10]), None)
    pt_postbl = PTPositionEmbeddingWrapper.add_sinusoids_timing_signal(
        torch.zeros(1, 10, 10), None)
    assert_equal_numpy(tf_postbl.numpy(), pt_postbl.detach().numpy())
    emb_dim = 5
    vocab_size = 10
    tf_emb = WordEmbeddingSharedWeights(emb_dim, vocab_size, False)
    pt_emb = PTWordEmbeddingSharedWeights(emb_dim, vocab_size, False)
    inp_2d = numpy.random.randint(0, 9, [2, 5])
    inp_1d = numpy.random.randint(0, 9, [
        3,
    ])
    logits_2d = numpy.random.rand(2, 5)
    logits_3d = numpy.random.rand(2, 4, 5)
    tf_inp_2d = tf.convert_to_tensor(inp_2d, tf.int32)
    tf_inp_1d = tf.convert_to_tensor(inp_1d, tf.int32)
    tf_logits_2d = tf.convert_to_tensor(logits_2d, tf.float32)
    tf_logits_3d = tf.convert_to_tensor(logits_3d, tf.float32)
    pt_inp_2d = torch.IntTensor(inp_2d)
    pt_inp_1d = torch.IntTensor(inp_1d)
    pt_logits_2d = torch.FloatTensor(logits_2d)
    pt_logits_3d = torch.FloatTensor(logits_3d)
    _ = tf_emb(tf_logits_2d, mode="linear")
    _ = pt_emb(pt_logits_2d, mode="linear")
    pt_emb._shared_weights.data = torch.Tensor(tf_emb._shared_weights.numpy())
    tf_posemb = PositionEmbeddingWrapper("sinusoids", tf_emb)
    pt_posemb = PTPositionEmbeddingWrapper("sinusoids", pt_emb)
    assert_equal_numpy(
        tf_posemb(tf_logits_2d, mode="linear").numpy(),
        pt_posemb(pt_logits_2d, mode="linear").detach().numpy())
    assert_equal_numpy(
        tf_posemb(tf_logits_3d, mode="linear").numpy(),
        pt_posemb(pt_logits_3d, mode="linear").detach().numpy())
    assert_equal_numpy(
        tf_posemb(tf_inp_2d).numpy(),
        pt_posemb(pt_inp_2d).detach().numpy())
    assert_equal_numpy(
        tf_posemb(tf_inp_1d, time=5).numpy(),
        pt_posemb(pt_inp_1d, time=5).detach().numpy())
示例#4
0
    def new(cls, args: dict, vocab_meta, name=None):
        """ Builds a sequence to sequence model.

        Args:
            args: A dict containing all model parameters.
            vocab_meta: A dict containing source-side vocabulary meta data, e.g. eos_id, vocab_size.
            name: The name of the model.

        Returns:
            A GPT2 model.
        """
        embedding = WordEmbeddingSharedWeights(
            embedding_dim=args["hidden_size"],
            vocab_size=vocab_meta["vocab_size"],
            share_softmax_weights=True,
            use_bias=args["softmax_bias"],
            name="embeddings")
        timing = args["timing"]
        if timing:
            if isinstance(timing, str):
                timing = {"timing": timing}
            elif not isinstance(timing, dict):
                raise ValueError("Unknown type of timing params: {}".format(
                    str(timing)))
            embedding = PositionEmbeddingWrapper(embedding_layer=embedding,
                                                 name="posenc_wrapper",
                                                 **timing)

        decoder = TransformerDecoder(
            num_layers=args["num_layers"],
            hidden_size=args["hidden_size"],
            num_attention_heads=args["num_attention_heads"],
            filter_size=args["filter_size"],
            ffn_activation=args["ffn_activation"],
            attention_dropout_rate=args["attention_dropout_rate"],
            attention_type=args["attention_type"],
            ffn_dropout_rate=args["ffn_dropout_rate"],
            layer_postprocess_dropout_rate=args[
                "layer_postprocess_dropout_rate"],
            layer_postprocess_epsilon=args["layer_postprocess_epsilon"],
            no_cross_attn_layer_list=[i for i in range(args["num_layers"])],
            name="decoder")
        model = cls(args, vocab_meta, embedding, decoder, name=name)
        _ = model({
            "trg":
            tf.convert_to_tensor([[0, 1, 2, vocab_meta["pad_id"]]], tf.int64),
            "trg_input":
            tf.convert_to_tensor([[vocab_meta["bos_id"], 1, 2]], tf.int64),
            "trg_length":
            tf.convert_to_tensor([4], tf.int64)
        })
        return model
示例#5
0
文件: gpt2.py 项目: taktak1/neurst
    def new(cls, args: dict, vocab_meta, name=None):
        """ Builds a sequence to sequence model.

        Args:
            args: A dict containing all model parameters.
            vocab_meta: A dict containing source-side vocabulary meta data, e.g. eos_id, vocab_size.
            name: The name of the model.

        Returns:
            A GPT2 model.
        """
        # build source and target modality
        embedding = PositionEmbeddingWrapper(
            embedding_layer=WordEmbeddingSharedWeights(
                embedding_dim=args["hidden_size"],
                vocab_size=vocab_meta["vocab_size"],
                share_softmax_weights=True,
                use_bias=False,
                name="embeddings"),
            name="posenc_wrapper",
            timing="emb",
            max_positions=args["max_position_embeddings"])
        decoder = TransformerDecoder(
            num_layers=args["num_layers"],
            hidden_size=args["hidden_size"],
            num_attention_heads=args["num_attention_heads"],
            filter_size=args["filter_size"],
            ffn_activation=args["ffn_activation"],
            attention_dropout_rate=args["attention_dropout_rate"],
            attention_type=args["attention_type"],
            ffn_dropout_rate=args["ffn_dropout_rate"],
            layer_postprocess_dropout_rate=args[
                "layer_postprocess_dropout_rate"],
            layer_postprocess_epsilon=args["layer_postprocess_epsilon"],
            with_encoder_decoder_attention=False,
            name="decoder")
        model = cls(args, vocab_meta, embedding, decoder, name=name)
        _ = model({"tokens": tf.convert_to_tensor([[0, 1, 2]], tf.int64)})
        return model
示例#6
0
def test_position_embedding():
    embedding_layer = WordEmbeddingSharedWeights(embedding_dim=5,
                                                 vocab_size=10,
                                                 share_softmax_weights=False)
    embedding_layer = PositionEmbeddingWrapper(
        timing="sinusoids",
        embedding_layer=embedding_layer,
    )
    inputs1d = tf.convert_to_tensor([4, 7, 8], tf.int32)
    inputs2d = tf.convert_to_tensor([[3, 1, 1, 1], [8, 1, 6, 4], [6, 6, 0, 5]],
                                    tf.int32)
    _ = embedding_layer(inputs2d)
    assert len(embedding_layer.get_weights()) == 1
    assert "emb/weights" in embedding_layer.trainable_weights[0].name
    embedding_layer.set_weights([
        numpy.array(
            [[-0.22683287, 0.20732224, -0.10953838, 0.15318757, -0.07203472],
             [0.48726183, 0.53683335, 0.38046378, -0.42776877, 0.51263684],
             [-0.20618078, 0.43871957, 0.26764846, 0.57276505, -0.13321346],
             [0.34847826, 0.1998071, 0.48136407, -0.03138721, -0.5397158],
             [-0.31466845, 0.24504018, 0.38156456, -0.03245735, 0.28105468],
             [-0.4769836, -0.2763745, -0.35024986, 0.5304734, -0.2523746],
             [0.13987714, -0.36480358, 0.5633767, 0.04371119, -0.5429846],
             [0.07482189, 0.4224295, 0.5645891, -0.12718052, 0.3637674],
             [0.4379062, 0.11231863, -0.6134181, -0.53932106, -0.5402442],
             [-0.18054467, -0.21964127, -0.14727849, 0.61441237, -0.13402274]])
    ])

    emb_for_2d = embedding_layer(inputs2d)
    emb_for_1d = embedding_layer(inputs1d, time=3)
    assert numpy.sum((emb_for_2d.numpy() - numpy.array(
        [[[0.77922106, 0.4467823, 2.0763628, 0.92981607, -1.2068413],
          [1.9310216, 1.2004958, 1.3910451, 0.04347992, 1.1462909],
          [1.998848, 1.2005959, 0.43459606, 0.04347992, 1.1462909],
          [1.2306706, 1.2006959, -0.13924962, 0.04347986, 1.1462909]],
         [[0.9791881, 0.2511521, -0.37164462, -0.2059586, -1.2080228],
          [1.9310216, 1.2004958, 1.3910451, 0.04347992, 1.1462909],
          [1.2220722, -0.81552565, 0.8436019, 1.0977412, -1.2141505],
          [-0.56250006, 0.5482265, -0.13678819, 0.9274231, 0.62845737]],
         [[0.3127748, -0.8157256, 2.2597487, 1.0977412, -1.2141505],
          [1.1542457, -0.8156256, 1.800051, 1.0977412, -1.2141505],
          [0.4020837, 0.46378663, -0.6610821, 1.3425379, -0.16107452],
          [-0.92544776, -0.6176922, -1.773175, 2.1861746, -0.56432676]]]))**
                     2) < 1e-9
    assert numpy.sum((emb_for_1d.numpy() - numpy.array(
        [[-0.56250006, 0.5482265, -0.13678819, 0.9274231, 0.62845737],
         [0.30842686, 0.9448811, 0.27246714, 0.71561563, 0.8134086],
         [1.120308, 0.2514521, -2.361637, -0.20595866, -1.2080228]]))**
                     2) < 1e-9

    emb_shared_layer = WordEmbeddingSharedWeights(embedding_dim=5,
                                                  vocab_size=10,
                                                  share_softmax_weights=True)
    emb_shared_layer = PositionEmbeddingWrapper(
        timing="emb", embedding_layer=emb_shared_layer)
    emb_for_2d = emb_shared_layer(inputs2d)
    logits_for_2d = emb_shared_layer(emb_for_2d, mode="linear")
    assert len(emb_shared_layer.get_weights()) == 3
    for w in emb_shared_layer.trainable_weights:
        if "shared/weights" in w.name:
            weights = w
        elif "shared/bias" in w.name:
            bias = w
    assert numpy.sum(
        (numpy.dot(emb_for_2d.numpy(), numpy.transpose(weights.numpy())) +
         bias.numpy() - logits_for_2d.numpy())**2) < 1e-9