示例#1
0
    def __init__(self, config, dataset):
        ''' Initialize the Transformer '''
        super(NewTransformer, self).__init__()

        self.dataset = dataset
        self.embedding = TokenEmbedding(
            dataset.vocab_size,
            config.embedding_size,
            padding_idx=self.padding_idx
        )
        self.position_embedding = PositionEmbedding(config.embedding_size)
        self.dropout = nn.Dropout(config.dropout_p, inplace=True)

        # Uniq attn attributes
        self.attn_ofs_uniq = list(set(
            config.enc_attn_offset + config.dec_attn_offset + config.enc_dec_attn_offset))
        self.attn_std_uniq = list(set(
            config.enc_attn_std + config.dec_attn_std + config.enc_dec_attn_std))

        # Allow for overriding the encoders and decoders in dervied classes
        self.encoders = self.create_encoders(config)
        self.decoders = self.create_decoders(config)

        self.label_smoothing = LabelSmoothingLoss(
            config.label_smoothing or 0,
            ignore_index=self.padding_idx,
            reduction='none'
        )
        self.cross_entropy = nn.CrossEntropyLoss(
            ignore_index=self.padding_idx,
            reduction='none'
        )
示例#2
0
    def __init__(self, config, dataset):
        ''' Initialize'''
        super(NPLM, self).__init__()

        self.dataset = dataset
        
        self.adaptive = config.adaptive
        # ngm: n tokens that concat with full emb
        # wsz: window size to average for long term context
        self.ngm, self.wsz = config.context_config                  
        self.long_term_block = 0 if self.ngm > 0 and self.wsz == -1 else \
                                    (config.batch_length - self.ngm) // self.wsz

        self.dim_concat_embs = self.ngm * config.embedding_size + self.long_term_block * config.embedding_size

        self.embedding = TokenEmbedding(
                dataset.vocab_size,
                config.embedding_size,
                config.model_size, 
                config.cutoffs,
                emb_std=config.emb_std,
                proj_std = config.proj_std,
                div_val=config.div_val,
                padding_idx=self.padding_idx,
                do_proj=config.do_proj
            )

        if self.adaptive:
            self.adaptive_softmax = AdaptiveSoftmax(self.dataset.vocab_size, config.embedding_size, config.embedding_size, 
                                                    config.cutoffs, div_val=config.div_val)

            self.tie_weights = config.tie_weights
            self.tie_projs = config.tie_projs

            if self.tie_weights:
                for i in range(len(self.adaptive_softmax.out_layers)):
                    self.adaptive_softmax.out_layers[i].weight = self.embedding.emb_layers[i].weight

            if self.tie_projs:
                for i in range(1, len(self.adaptive_softmax.out_projs)):
                    if config.div_val == 1 and config.model_size != config.embedding_size:
                        self.adaptive_softmax.out_projs[i] = self.embedding.emb_projs[0]
                    elif config.div_val != 1:
                        self.adaptive_softmax.out_projs[i] = self.embedding.emb_projs[i]

        self.layers = self.create_layers(config)
        self.position_embedding = PositionEmbedding(config.model_size) # only used in transformer-N
        self.label_smoothing = LabelSmoothingLoss(
            config.label_smoothing or 0,
            ignore_index=self.padding_idx,
            reduction='none'
        )
        self.cross_entropy = nn.CrossEntropyLoss(
            ignore_index=self.padding_idx,
            reduction='none'
        )

        self.dropout = nn.Dropout(config.dropout_p, inplace=True)

        self.config = config
示例#3
0
    def __init__(self, config, dataset):
        ''' Initialize the Transformer '''
        super(ProbeNewTransformer, self).__init__()

        self.dataset = dataset
        self.span = config.span
        self.embedding = TokenEmbedding(
            dataset.vocab_size,
            config.embedding_size,
            padding_idx=self.padding_idx
        )
        self.position_embedding = PositionEmbedding(config.embedding_size)
        self.dropout = nn.Dropout(config.dropout_p, inplace=True)

        # Allow for overriding the encoders and decoders in dervied classes
        self.encoders = type(self).create_encoders(config)
        self.decoders = self.create_decoders(config)

        self.label_smoothing = LabelSmoothingLoss(
            config.label_smoothing or 0,
            ignore_index=self.padding_idx,
            reduction='none'
        )
        self.cross_entropy = nn.CrossEntropyLoss(
            ignore_index=self.padding_idx,
            reduction='none'
        )
示例#4
0
    def __init__(self, config, dataset):
        ''' Initialize the Transformer '''
        super(Transformer, self).__init__()

        self.dataset = dataset
        self.config = config

        self.adaptive = config.adaptive

        self.embedding = TokenEmbedding(dataset.vocab_size,
                                        config.embedding_size,
                                        config.model_size,
                                        config.cutoffs,
                                        emb_std=config.emb_std,
                                        proj_std=config.proj_std,
                                        div_val=config.div_val,
                                        padding_idx=self.padding_idx,
                                        do_proj=config.do_proj)

        if self.adaptive:
            self.adaptive_softmax = AdaptiveSoftmax(self.dataset.vocab_size,
                                                    config.embedding_size,
                                                    config.model_size,
                                                    config.cutoffs,
                                                    div_val=config.div_val)

            self.tie_weights = config.tie_weights
            self.tie_projs = config.tie_projs

            if self.tie_weights:
                for i in range(len(self.adaptive_softmax.out_layers)):
                    self.adaptive_softmax.out_layers[
                        i].weight = self.embedding.emb_layers[i].weight

            if self.tie_projs:
                for i in range(1, len(self.adaptive_softmax.out_projs)):
                    if config.div_val == 1 and config.model_size != config.embedding_size:
                        self.adaptive_softmax.out_projs[
                            i] = self.embedding.emb_projs[0]
                    elif config.div_val != 1:
                        self.adaptive_softmax.out_projs[
                            i] = self.embedding.emb_projs[i]

        self.position_embedding = PositionEmbedding(config.embedding_size)
        self.dropout = nn.Dropout(config.dropout_p, inplace=True)

        if len(config.no_attention) == 1:
            config.no_attention = config.no_attention * config.num_layers
        assert len(config.no_attention) == config.num_layers

        self.layers = self.create_layers(config)

        self.label_smoothing = LabelSmoothingLoss(
            config.label_smoothing or 0,
            ignore_index=self.padding_idx,
            reduction='none')
        self.cross_entropy = nn.CrossEntropyLoss(ignore_index=self.padding_idx,
                                                 reduction='none')
    def __init__(self, config, dataset):
        ''' Initialize the Transformer '''
        super(InterleaveFixedPosEmbEncoderOnlyTransformer, self).__init__()

        self.dataset = dataset
        self.embedding = TokenEmbedding(dataset.vocab_size,
                                        config.embedding_size,
                                        padding_idx=self.padding_idx)
        self.position_embedding = PositionEmbedding(config.embedding_size)
        self.num_layers = config.num_layers

        encoder_positional_embedding_list = []
        for i in range(self.num_layers // 2):
            position_embedding_encoder = LearnedPositionalEmbedding(
                dataset.max_input_length, config.embedding_size,
                self.padding_idx)
            nn.init.normal_(position_embedding_encoder.weight,
                            mean=0,
                            std=config.embedding_size**-0.5)
            if self.padding_idx is not None:
                nn.init.constant_(
                    position_embedding_encoder.weight[self.padding_idx], 0)
            encoder_positional_embedding_list.append(
                position_embedding_encoder)

        self.encoder_positional_embeddings = nn.ModuleList(
            encoder_positional_embedding_list)

        self.position_embedding_decoder = LearnedPositionalEmbedding(
            dataset.max_target_length, config.embedding_size, self.padding_idx)
        nn.init.normal_(self.position_embedding_decoder.weight,
                        mean=0,
                        std=config.embedding_size**-0.5)
        if self.padding_idx is not None:
            nn.init.constant_(
                self.position_embedding_decoder.weight[self.padding_idx], 0)

        self.dropout = nn.Dropout(config.dropout_p, inplace=True)

        # Uniq attn attributes
        self.attn_ofs_uniq = list(
            set(config.enc_attn_offset + config.dec_attn_offset +
                config.enc_dec_attn_offset))
        self.attn_std_uniq = list(
            set(config.enc_attn_std + config.dec_attn_std +
                config.enc_dec_attn_std))

        # Allow for overriding the encoders and decoders in dervied classes
        self.encoders = self.create_encoders(config)
        self.decoders = self.create_decoders(config)

        self.label_smoothing = LabelSmoothingLoss(
            config.label_smoothing or 0,
            ignore_index=self.padding_idx,
            reduction='none')
        self.cross_entropy = nn.CrossEntropyLoss(ignore_index=self.padding_idx,
                                                 reduction='none')