Exemplo n.º 1
0
    def __init__(self, tag_vocabs, embed, num_layers, d_model, n_head, feedforward_dim, dropout,
                 after_norm=True, attn_type='adatrans', bi_embed=None,
                 fc_dropout=0.3, pos_embed=None, scale=False, dropout_attn=None):

        super().__init__()

        self.embed = embed
        embed_size = self.embed.embed_size
        self.bi_embed = None
        if bi_embed is not None:
            self.bi_embed = bi_embed
            embed_size += self.bi_embed.embed_size

        self.tag_vocabs = []
        self.out_fcs = nn.ModuleList()
        self.crfs = nn.ModuleList()

        for i in range(len(tag_vocabs)):
            self.tag_vocabs.append(tag_vocabs[i])
            out_fc = nn.Linear(1536, len(tag_vocabs[i]))
            self.out_fcs.append(out_fc)
            trans = allowed_transitions(
                tag_vocabs[i], encoding_type='bioes', include_start_end=True)
            crf = ConditionalRandomField(
                len(tag_vocabs[i]), include_start_end_trans=True, allowed_transitions=trans)
            self.crfs.append(crf)

        self.in_fc = nn.Linear(embed_size, d_model)

        self.transformer = TransformerEncoder(num_layers, d_model, n_head, feedforward_dim, dropout,
                                              after_norm=after_norm, attn_type=attn_type,
                                              scale=scale, dropout_attn=dropout_attn,
                                              pos_embed=pos_embed)

        self.fc_dropout = nn.Dropout(fc_dropout)
Exemplo n.º 2
0
    def __init__(self,
                 config,
                 data_bundle,
                 embed,
                 num_layers,
                 d_model,
                 n_head,
                 feedforward_dim,
                 dropout,
                 after_norm=True,
                 attn_type='adatrans',
                 bi_embed=None,
                 fc_dropout=0.3,
                 pos_embed=None,
                 scale=False,
                 dropout_attn=None):
        """

        :param tag_vocab: fastNLP Vocabulary
        :param embed: fastNLP TokenEmbedding
        :param num_layers: number of self-attention layers
        :param d_model: input size
        :param n_head: number of head
        :param feedforward_dim: the dimension of ffn
        :param dropout: dropout in self-attention
        :param after_norm: normalization place
        :param attn_type: adatrans, naive
        :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None
        :param bi_embed: Used in Chinese scenerio
        :param fc_dropout: dropout rate before the fc layer
        """
        super().__init__()
        self.config = config
        self.data_bundle = data_bundle
        tag_vocab = data_bundle.get_vocab('target')
        self.embed = embed
        embed_size = self.embed.embed_size
        self.bi_embed = None
        if bi_embed is not None:
            self.bi_embed = bi_embed
            embed_size += self.bi_embed.embed_size

        self.in_fc = nn.Linear(embed_size, d_model)

        self.transformer = TransformerEncoder(num_layers,
                                              d_model,
                                              n_head,
                                              feedforward_dim,
                                              dropout,
                                              after_norm=after_norm,
                                              attn_type=attn_type,
                                              scale=scale,
                                              dropout_attn=dropout_attn,
                                              pos_embed=pos_embed)
        self.fc_dropout = nn.Dropout(fc_dropout)
        self.out_fc = nn.Linear(d_model, len(tag_vocab))
        trans = allowed_transitions(tag_vocab, include_start_end=True)
        self.crf = ConditionalRandomField(len(tag_vocab),
                                          include_start_end_trans=True,
                                          allowed_transitions=trans)
Exemplo n.º 3
0
    def get_network(self):

        return TransformerEncoder(embed_dim=self.embed_dim,
                                  num_heads=self.num_heads,
                                  layers=self.layers,
                                  attn_dropout=self.attn_dropout,
                                  relu_dropout=self.relu_dropout,
                                  res_dropout=self.res_dropout,
                                  attn_mask=self.attn_mask)
Exemplo n.º 4
0
    def get_encoder_network(self):

        return TransformerEncoder(embed_dim=self.orig_d_l,
                                  num_heads=self.num_heads,
                                  layers=self.layers,
                                  attn_dropout=self.attn_dropout,
                                  relu_dropout=self.relu_dropout,
                                  res_dropout=self.res_dropout,
                                  attn_mask=self.attn_mask,
                                  crossmodal=self.crossmodal)
Exemplo n.º 5
0
    def __init__(
        self,
        src_embedding,
        tgt_embedding,
        embedding_size,
        hidden_size,
        vocab_size,
        start_index,
        end_index,
        padding_index,
        num_heads,
        num_layers=2,
        dropout=0.2,
        learning_position_embedding=False,
        embedding_scale=False,
        num_positions=1024,
    ):
        super().__init__()

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.start_index = start_index
        self.end_index = end_index
        self.padding_index = padding_index
        self.dropout = dropout
        self.num_layers = num_layers

        self.encoder = TransformerEncoder(
            num_heads,
            num_layers,
            embedding_size,
            src_embedding,
            hidden_size,
            dropout=dropout,
            learn_position_embedding=learning_position_embedding,
            embedding_scale=embedding_scale,
            num_positions=num_positions)

        output_layer = nn.Sequential(nn.LayerNorm(embedding_size),
                                     nn.Linear(embedding_size, vocab_size))

        self.decoder = TransformerDecoder(
            num_heads,
            num_layers,
            embedding_size,
            hidden_size,
            tgt_embedding,
            start_index,
            end_index,
            output_layer,
            dropout=dropout,
            embedding_scale=embedding_scale,
            learn_positional_embedding=learning_position_embedding,
            num_positions=num_positions)
Exemplo n.º 6
0
    def get_transformer(self, layers=-1):

        embed_dim, attn_dropout = self.input_feat_dim, self.attn_dropout

        return TransformerEncoder(embed_dim=embed_dim,
                                  num_heads=self.num_heads,
                                  layers=max(self.layers, layers),
                                  attn_dropout=attn_dropout,
                                  relu_dropout=self.relu_dropout,
                                  res_dropout=self.res_dropout,
                                  embed_dropout=self.embed_dropout,
                                  attn_mask=self.attn_mask)
Exemplo n.º 7
0
    def __init__(self, embed_dim, num_heads=4, layers=1):
        super(Transformer, self).__init__()
        self.num_heads = num_heads
        self.attn_dropout = 0.1
        self.relu_dropout = 0.1
        self.res_dropout = 0.1
        self.embed_dropout = 0.1
        self.attn_mask = False
        self.layers = layers

        self.transformer = TransformerEncoder(embed_dim=embed_dim,
                                              num_heads=self.num_heads,
                                              layers=self.layers,
                                              attn_dropout=self.attn_dropout,
                                              relu_dropout=self.relu_dropout,
                                              res_dropout=self.res_dropout,
                                              embed_dropout=self.embed_dropout,
                                              attn_mask=self.attn_mask)
Exemplo n.º 8
0
    def get_network(self, self_type='l', layers=-1):
        if self_type in ['l', 'vl']:
            embed_dim, attn_dropout = self.d_l, self.attn_dropout
        elif self_type in ['v', 'lv']:
            embed_dim, attn_dropout = self.d_v, self.attn_dropout_v
        elif self_type == 'l_mem':
            embed_dim, attn_dropout = self.d_l, self.attn_dropout
        elif self_type == 'v_mem':
            embed_dim, attn_dropout = self.d_v, self.attn_dropout
        else:
            raise ValueError("Unknown network type")

        return TransformerEncoder(embed_dim=embed_dim,
                                  num_heads=self.num_heads,
                                  layers=max(self.layers, layers),
                                  attn_dropout=attn_dropout,
                                  relu_dropout=self.relu_dropout,
                                  res_dropout=self.res_dropout,
                                  embed_dropout=self.embed_dropout,
                                  attn_mask=self.attn_mask)
Exemplo n.º 9
0
    def __init__(self, emb_size: int, n_hidden: int, ff_size: int, n_head: int,
                 n_block: int, dropout: float, beam_size: int,
                 max_decoding_step: int, minimum_length: int,
                 label_smoothing: float, share: bool,
                 vocab: Vocabulary) -> None:

        super().__init__(vocab)

        self.vocab = vocab
        self.vocab_size = vocab.get_vocab_size('tokens')
        self.beam_size = beam_size
        self.max_decoding_step = max_decoding_step
        self.minimum_length = minimum_length
        self.label_smoothing = label_smoothing
        self._bos = self.vocab.get_token_index(START_SYMBOL)
        self._eos = self.vocab.get_token_index(END_SYMBOL)

        if share:
            self.src_embedding = nn.Sequential(
                Embeddings(emb_size, self.vocab_size),
                PositionalEncoding(n_hidden, dropout))
            self.tgt_embedding = self.src_embedding
        else:
            src_vocab_size = vocab.get_vocab_size('src_tokens')
            self.src_embedding = nn.Sequential(
                Embeddings(emb_size, src_vocab_size),
                PositionalEncoding(n_hidden, dropout))
            self.tgt_embedding = nn.Sequential(
                Embeddings(emb_size, self.vocab_size),
                PositionalEncoding(n_hidden, dropout))

        self.encoder = TransformerEncoder(n_hidden, ff_size, n_head, dropout,
                                          n_block)
        self.decoder = TransformerDecoder(n_hidden, ff_size, n_head, dropout,
                                          n_block)

        self.generator = nn.Linear(n_hidden, self.vocab_size)
        self.accuracy = SequenceAccuracy()
Exemplo n.º 10
0
    def __init__(self, cfgs):
        super().__init__()
        self.cfgs = cfgs

        feature_enc_layers = eval(cfgs.conv_feature_layers)
        self.embed = feature_enc_layers[-1][0]

        self.feature_extractor = ConvFeatureExtraction(
            conv_layers=feature_enc_layers,
            in_d=1 if cfgs.dataset == 'mitbih' else 12,
            dropout=0.0,
            mode=cfgs.extractor_mode,
            conv_bias=cfgs.conv_bias)

        self.post_extract_proj = (nn.Linear(self.embed, cfgs.embed_dim)
                                  if self.embed != cfgs.embed_dim
                                  and not cfgs.quantize_input else None)

        self.mask_prob = cfgs.mask_prob
        self.mask_selection = cfgs.mask_selection
        self.mask_other = cfgs.mask_other
        self.mask_length = cfgs.mask_length
        self.no_mask_overlap = cfgs.no_mask_overlap
        self.mask_min_space = cfgs.mask_min_space

        self.mask_channel_prob = cfgs.mask_channel_prob
        self.mask_channel_selection = cfgs.mask_channel_selection
        self.mask_channel_other = cfgs.mask_channel_other
        self.mask_channel_length = cfgs.mask_channel_length
        self.no_mask_channel_overlap = cfgs.no_mask_channel_overlap
        self.mask_channel_min_space = cfgs.mask_channel_min_space

        #XXX
        self.dropout_input = nn.Dropout(cfgs.dropout_input)
        self.dropout_features = nn.Dropout(cfgs.dropout_features)

        self.quantizer = None
        self.input_quantizer = None

        self.n_negatives = cfgs.num_negatives
        self.cross_sample_negatives = cfgs.cross_sample_negatives
        self.codebook_negatives = cfgs.codebook_negatives
        self.negatives_from_everywhere = cfgs.negatives_from_everywhere

        self.logit_temp = cfgs.logit_temp

        self.feature_grad_mult = cfgs.feature_grad_mult

        final_dim = cfgs.final_dim if cfgs.final_dim > 0 else cfgs.embed_dim

        if cfgs.quantize_targets:
            vq_dim = cfgs.latent_dim if cfgs.latent_dim > 0 else final_dim
            self.quantizer = GumbelVectorQuantizer(
                dim=self.embed,
                num_vars=cfgs.latent_vars,
                temp=cfgs.latent_temp,
                groups=cfgs.latent_groups,
                combine_groups=False,
                vq_dim=vq_dim,
                time_first=True,
            )
            self.project_q = nn.Linear(vq_dim, final_dim)
        else:
            self.project_q = nn.Linear(self.embed, final_dim)

        if cfgs.quantize_input:
            if cfgs.same_quantizer and self.quantizer is not None:
                vq_dim = final_dim
                self.input_quantizer = self.quantizer
            else:
                vq_dim = cfgs.latent_dim if cfgs.latent_dim > 0 else cfgs.embed_dim
                self.input_quantizer = GumbelVectorQuantizer(
                    dim=self.embed,
                    num_vars=cfgs.latent_vars,
                    temp=cfgs.latent_temp,
                    groups=cfgs.latent_groups,
                    combine_groups=False,
                    vq_dim=vq_dim,
                    time_first=True,
                )
            self.project_inp = nn.Linear(vq_dim, self.embed_dim)

        self.mask_emb = nn.Parameter(
            torch.FloatTensor(cfgs.embed_dim).uniform_())

        self.encoder = TransformerEncoder(cfgs)
        self.layer_norm = LayerNorm(self.embed)

        self.target_glu = None
        if cfgs.target_glu:
            self.target_glu = nn.Sequential(
                nn.Linear(final_dim, final_dim * 2), nn.GLU())

        self.final_proj = nn.Linear(cfgs.embed_dim, final_dim)
Exemplo n.º 11
0
    def __init__(self,
                 tag_vocab,
                 embed,
                 num_layers,
                 d_model,
                 n_head,
                 feedforward_dim,
                 dropout,
                 after_norm=True,
                 attn_type='adatrans',
                 bi_embed=None,
                 fc_dropout=0.3,
                 pos_embed=None,
                 scale=False,
                 dropout_attn=None,
                 use_knowledge=False,
                 feature2count=None,
                 vocab_size=None,
                 feature_vocab_size=None,
                 kv_attn_type="dot",
                 memory_dropout=0.2,
                 fusion_dropout=0.2,
                 fusion_type='concat',
                 highway_layer=0,
                 key_embed_dropout=0.2,
                 knowledge_type="all",
                 use_zen=False,
                 zen_model=None):
        """
        :param tag_vocab: fastNLP Vocabulary
        :param embed: fastNLP TokenEmbedding
        :param num_layers: number of self-attention layers
        :param d_model: input size
        :param n_head: number of head
        :param feedforward_dim: the dimension of ffn
        :param dropout: dropout in self-attention
        :param after_norm: normalization place
        :param attn_type: adatrans, naive
        :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None
        :param bi_embed: Used in Chinese scenerio
        :param fc_dropout: dropout rate before the fc layer
        :param use_knowledge: 是否使用stanford corenlp的知识
        :param feature2count: 字典, {"gram2count": dict, "pos_tag2count": dict, "chunk_tag2count": dict, "dep_tag2count": dict},
        :param
        """
        super().__init__()
        self.use_knowledge = use_knowledge
        self.feature2count = feature2count
        self.vocab_size = vocab_size
        self.feature_vocab_size = feature_vocab_size

        # add ZEN
        self.use_zen = use_zen

        self.embed = embed
        embed_size = self.embed.embed_size
        self.bi_embed = None
        if bi_embed is not None:
            self.bi_embed = bi_embed
            embed_size += self.bi_embed.embed_size

        self.in_fc = nn.Linear(embed_size, d_model)

        self.transformer = TransformerEncoder(num_layers,
                                              d_model,
                                              n_head,
                                              feedforward_dim,
                                              dropout,
                                              after_norm=after_norm,
                                              attn_type=attn_type,
                                              scale=scale,
                                              dropout_attn=dropout_attn,
                                              pos_embed=pos_embed)

        self.kv_memory = KeyValueMemoryNetwork(
            vocab_size=vocab_size,
            feature_vocab_size=feature_vocab_size,
            attn_type=kv_attn_type,
            emb_size=d_model,
            scaled=True,
            key_embed_dropout=key_embed_dropout,
            knowledge_type=knowledge_type)

        self.output_dim = d_model * _dim_map[fusion_type]
        self.fusion = FusionModule(fusion_type=fusion_type,
                                   layer=highway_layer,
                                   input_size=d_model,
                                   output_size=self.output_dim,
                                   dropout=fusion_dropout)

        self.memory_dropout = nn.Dropout(p=memory_dropout)

        self.out_fc = nn.Linear(self.output_dim, len(tag_vocab))

        self.fc_dropout = nn.Dropout(fc_dropout)

        trans = allowed_transitions(tag_vocab, include_start_end=True)
        self.crf = ConditionalRandomField(len(tag_vocab),
                                          include_start_end_trans=True,
                                          allowed_transitions=trans)
Exemplo n.º 12
0
    def __init__(self,
                 tag_vocab,
                 embed,
                 num_layers,
                 d_model,
                 n_head,
                 feedforward_dim,
                 dropout,
                 after_norm=True,
                 attn_type='adatrans',
                 bi_embed=None,
                 fc_dropout=0.3,
                 pos_embed=None,
                 scale=False,
                 dropout_attn=None,
                 use_knowledge=False,
                 multi_att_dropout=0.3,
                 use_ngram=False,
                 gram2id=None,
                 cat_num=5,
                 device=None):
        """
        :param tag_vocab: fastNLP Vocabulary
        :param embed: fastNLP TokenEmbedding
        :param num_layers: number of self-attention layers
        :param d_model: input size
        :param n_head: number of head
        :param feedforward_dim: the dimension of ffn
        :param dropout: dropout in self-attention
        :param after_norm: normalization place
        :param attn_type: adatrans, naive
        :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None
        :param bi_embed: Used in Chinese scenerio
        :param fc_dropout: dropout rate before the fc layer
        :param use_knowledge: 是否使用stanford corenlp的知识
        :param feature2count: 字典, {"gram2count": dict, "pos_tag2count": dict, "chunk_tag2count": dict, "dep_tag2count": dict},
        :param
        """
        super().__init__()
        self.use_knowledge = use_knowledge
        self.use_ngram = use_ngram
        self.gram2id = gram2id
        self.embed = embed

        # new add
        self.cat_num = cat_num
        self.use_attention = use_ngram
        embed_size = self.embed.embed_size
        self.bi_embed = None
        if bi_embed is not None:
            self.bi_embed = bi_embed
            embed_size += self.bi_embed.embed_size

        # self.ngram_embeddings = BertWordEmbeddings(hidden_size=embed_size)

        self.in_fc = nn.Linear(embed_size, d_model)
        self.transformer = TransformerEncoder(num_layers,
                                              d_model,
                                              n_head,
                                              feedforward_dim,
                                              dropout,
                                              after_norm=after_norm,
                                              attn_type=attn_type,
                                              scale=scale,
                                              dropout_attn=dropout_attn,
                                              pos_embed=pos_embed)
        self.hidden_size = d_model

        if self.use_attention:
            print("use multi_attention")
            self.multi_attention = MultiChannelAttention(
                len(self.gram2id), self.hidden_size, self.cat_num)
            self.attention_fc = nn.Linear(self.hidden_size * self.cat_num,
                                          self.hidden_size,
                                          bias=False)
            self.multi_att_dropout = nn.Dropout(multi_att_dropout)
            self.out_fc = nn.Linear(self.hidden_size * 2,
                                    len(tag_vocab),
                                    bias=False)

            self.gate = GateConcMechanism(hidden_size=self.hidden_size)
            # self.gete_dropout = nn.Dropout(gate_dropout)

        else:
            self.multi_attention = None
            self.out_fc = nn.Linear(self.hidden_size,
                                    len(tag_vocab),
                                    bias=False)
        # self.out_fc = nn.Linear(d_model, len(tag_vocab))
        # print("len(tag_vocab):  ", len(tag_vocab))
        self.fc_dropout = nn.Dropout(fc_dropout)

        trans = allowed_transitions(tag_vocab, include_start_end=True)
        self.crf = ConditionalRandomField(len(tag_vocab),
                                          include_start_end_trans=True,
                                          allowed_transitions=trans)