Пример #1
0
    def __init__(self, embed_size, cate_dim, args):
        super(NeRTModel, self).__init__()

        hidden_size = args.hidden_size
        num_layers = args.num_layers

        self.rnn = nn.LSTM(embed_size,
                           int(hidden_size / 2),
                           num_layers,
                           batch_first=True,
                           bidirectional=True)
        #self.rnn = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        #self.mlp = nn.Linear(hidden_size*2, embed_size)
        self.mlp = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size * 11 // 10),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size * 11 // 10, hidden_size * 8 // 10),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size * 8 // 10, hidden_size * 7 // 10),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size * 7 // 10, hidden_size * 6 // 10),
            nn.ReLU(inplace=True), nn.Linear(hidden_size * 6 // 10,
                                             embed_size))

        self._dropout = nn.Dropout(0.3)

        self._W_attn = nn.Parameter(torch.zeros([hidden_size * 2, 1],
                                                dtype=torch.float32),
                                    requires_grad=True)
        self._b_attn = nn.Parameter(torch.zeros([1], dtype=torch.float32),
                                    requires_grad=True)

        self._mha = nn.MultiheadAttention(embed_size, 20 if
                                          (embed_size % 20) == 0 else 10)
        self._mlp_mha = nn.Linear(embed_size, hidden_size)

        nn.init.xavier_normal_(self._W_attn.data)

        self._rnn_hidden_size = hidden_size
Пример #2
0
 def __init__(self,
              n_stacks,
              n_dims,
              n_heads,
              seq_len,
              n_multihead=1,
              dropout=0.0):
     super(StackedNMultiHeadAttention, self).__init__()
     self.n_stacks = n_stacks
     self.n_multihead = n_multihead
     self.n_dims = n_dims
     self.norm_layers = nn.LayerNorm(n_dims)
     # n_stacks has n_multiheads each
     self.multihead_layers = nn.ModuleList(n_stacks * [
         nn.ModuleList(n_multihead * [
             nn.MultiheadAttention(
                 embed_dim=n_dims, num_heads=n_heads, dropout=dropout),
         ]),
     ])
     self.ffn = nn.ModuleList(n_stacks * [FFN(n_dims)])
     self.mask = torch.triu(torch.ones(seq_len, seq_len),
                            diagonal=1).to(dtype=torch.bool)
Пример #3
0
 def __init__(self,
              input_dim,
              hidden_dim,
              ff_dim=2048,
              heads=1,
              max_len=5000,
              dropout=0,
              rnn=False,
              attn=False,
              residual=False):
     super().__init__()
     self.attn = nn.MultiheadAttention(input_dim, heads,
                                       dropout=dropout) if attn else None
     self.ln = BlockNorm(input_dim)
     self.dropout = nn.Dropout(dropout)
     self.rnn = nn.LSTM(input_dim, input_dim) if rnn else None
     self.ff = Boom(input_dim,
                    dropout=dropout,
                    hidden_dim=int(ff_dim * 2),
                    shortcut=True)
     self.residual = residual
     self.max_len = max_len
Пример #4
0
    def __init__(self, n_skill, max_seq=100, embed_dim=128):
        super(SAKTModel, self).__init__()
        self.n_skill = n_skill
        self.embed_dim = embed_dim

        self.embedding = nn.Embedding(2 * n_skill + 1, embed_dim)
        self.pos_embedding1 = nn.Embedding(max_seq - 1, embed_dim)
        self.pos_embedding2 = nn.Embedding(max_seq - 1, embed_dim)
        self.tsdiff_embedding = nn.Embedding(301, embed_dim)
        self.elptime_embedding = nn.Embedding(301, embed_dim)
        self.e_embedding = nn.Embedding(n_skill + 1, embed_dim)
        self.part_embedding = nn.Embedding(8, embed_dim)

        self.multi_att = nn.MultiheadAttention(embed_dim=embed_dim,
                                               num_heads=8,
                                               dropout=0.2)

        self.dropout = nn.Dropout(0.2)
        self.layer_normal = nn.LayerNorm(embed_dim)

        self.ffn = FFN(embed_dim)
        self.pred = nn.Linear(embed_dim, 1)
Пример #5
0
    def __init__(self,
                 n_skill,
                 max_seq=100,
                 embed_dim=128,
                 num_heads=8,
                 dropout=0.2):
        super(SAKTModel, self).__init__()
        self.n_skill = n_skill
        self.embed_dim = embed_dim

        self.embedding = nn.Embedding(2 * n_skill + 1, embed_dim)
        self.pos_embedding_enc = nn.Embedding(max_seq - 1, embed_dim)
        self.pos_embedding_dec = nn.Embedding(max_seq - 1, embed_dim)
        self.e_embedding = nn.Embedding(n_skill + 1, embed_dim)
        self.part_embedding = nn.Embedding(8, embed_dim)
        self.elapsed_time_embedding = nn.Embedding(302, embed_dim)
        self.duration_previous_content_embedding = nn.Embedding(302, embed_dim)

        self.multi_att_enc_self1 = SelfAttentionLayer(embed_dim=embed_dim,
                                                      num_heads=num_heads,
                                                      dropout=dropout)
        self.multi_att_enc_self2 = SelfAttentionLayer(embed_dim=embed_dim,
                                                      num_heads=num_heads,
                                                      dropout=dropout)
        self.multi_att_dec_self1 = SelfAttentionLayer(embed_dim=embed_dim,
                                                      num_heads=num_heads,
                                                      dropout=dropout)
        self.multi_att_dec_self2 = SelfAttentionLayer(embed_dim=embed_dim,
                                                      num_heads=num_heads,
                                                      dropout=dropout)
        self.multi_att_dec = nn.MultiheadAttention(embed_dim=embed_dim,
                                                   num_heads=num_heads,
                                                   dropout=dropout)

        self.dropout = nn.Dropout(0.2)
        self.layer_normal = nn.LayerNorm(embed_dim)

        self.ffn = FFN(embed_dim)
        self.pred = nn.Linear(embed_dim, 1)
Пример #6
0
    def __init__(self, config:dict) -> None:
        super(Model, self).__init__()
        self.word_embedding = config["embedding"]
        if not config["freeze_embedding"]:
            self.word_embedding.requires_grad_(True)
        else:
            self.word_embedding.requires_grad_(False)

        self.word_encoder = DynamicRNN(config["embedding_dim"], hidden_size=config["text_hidden_size"], 
                                num_layers=config["text_layers"], dropout=config["dropout"],
                                bias_init=config["bias_init"], batch_first=True, bidirectional=True, rnn_type="GRU")
        
        self.word_output_size = config["text_hidden_size"] * config["text_layers"] * 2

        self.img_fc = nn.Linear(config["img_input_size"], self.word_output_size)
        self.tanh1 = nn.Tanh()
        self.attn = nn.MultiheadAttention(self.word_output_size, config["attention_nhead"])

        self.fusion_encoder = MultiheadAttentionEncoder(self.word_output_size, config["fusion_nheads"], config["uniform_bound"])

        self.output_layer = OutputLayer(config["task"], self.word_output_size,
                             config["output_size"], config["dropout"])
Пример #7
0
    def __init__(self,
                 d_model: int,
                 nhead: int,
                 dim_feedforward: int = 2048,
                 dropout: float = 0.1,
                 activation: str = "relu",
                 normalize_before: bool = True) -> None:
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=0.0)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)

        self.normalize_before = normalize_before
Пример #8
0
    def __init__(self,
                 d_model=512,
                 nhead=8,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu"):
        super(TransformerEncoderLayer, self).__init__()
        # there are total 8 attentions, so each attention head dim will be d_model // nhead,
        # MultiheadAttention will take care of that
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)

        ## Feed Forward layer is nothing but 2 linear layers that take the dims from 512->2048->512
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)
Пример #9
0
 def __init__(self,
              input_dim,
              lstm_hidden_dim,
              lstm_layers,
              lstm_dropout,
              word_pad_idx,
              attn_heads=None,
              attn_dropout=None):
     super().__init__()
     self.word_pad_idx = word_pad_idx
     #biLSTM layer
     self.lstm = nn.LSTM(input_size=input_dim,
                         hidden_size=lstm_hidden_dim,
                         num_layers=lstm_layers,
                         bidirectional=True,
                         dropout=lstm_dropout if lstm_layers > 1 else 0)
     #attention layer
     self.attn_heads = attn_heads
     if self.attn_heads:
         self.attn = nn.MultiheadAttention(embed_dim=lstm_hidden_dim * 2,
                                           num_heads=attn_heads,
                                           dropout=attn_dropout)
Пример #10
0
    def __init__(self, config, bert_hidden_states=4, num_heads=1, dropout=0.1):
        config = deepcopy(config)
        config.output_hidden_states = True
        super(BertForQuestionAnswering2, self).__init__(config)

        self.num_labels = config.num_labels
        self.bert_hidden_states = bert_hidden_states

        self.bert = BertModel(config)
        #config.num_labels = 1
        num_labels = 1

        self.qa_outputs = nn.Linear(
            config.hidden_size * self.bert_hidden_states * 2, num_labels)
        self.qa_attn = nn.MultiheadAttention(config.hidden_size *
                                             self.bert_hidden_states,
                                             num_heads=num_heads,
                                             dropout=dropout)
        self.sm = nn.Sigmoid()

        self.init_weights()
        self.qa_outputs.bias.data.fill_(1.0)
Пример #11
0
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
                 activation="relu", normalize_before=False, faster=False, use_linear_attention=False):
        super().__init__()
        self.faster = faster
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        if self.faster:
            self.linear1 = nn.Linear(d_model, dim_feedforward // 4)
            self.dropout = nn.Dropout(dropout, inplace=True)
            self.linear2 = nn.Linear(dim_feedforward // 4, d_model)
        else:
            self.linear1 = nn.Linear(d_model, dim_feedforward)
            self.dropout = nn.Dropout(dropout, inplace=True)
            self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout, inplace=True)
        self.dropout2 = nn.Dropout(dropout, inplace=True)

        self.activation = _get_activation_fn(activation)
        self.normalize_before = normalize_before
Пример #12
0
 def __init__(self, config):
     super().__init__()
     self.config = config
     self.att = nn.MultiheadAttention(
         embed_dim=config.n_embd,
         num_heads=config.n_head,
         dropout=config.attn_pdrop,
     )
     
     assert config.n_embd % config.n_head == 0
     # key, query, value projections for all heads
     self.key = nn.Linear(config.n_embd, config.n_embd)
     self.query = nn.Linear(config.n_embd, config.n_embd)
     self.value = nn.Linear(config.n_embd, config.n_embd)
     # regularization
     self.resid_drop = nn.Dropout(config.resid_pdrop)
     # output projection
     self.proj = nn.Linear(config.n_embd, config.n_embd)
     # causal mask to ensure that attention is only applied to the left in the input sequence
     # Here dtype=bool, torch.triu unstead of tril and diagonal=1 are VERY important
     mask_mat = torch.ones(config.block_size, config.block_size, dtype=torch.bool)
     self.register_buffer("mask", torch.triu(mask_mat, diagonal=1)) 
Пример #13
0
    def __init__(self, dim_model, heads_en, total_ex, total_cat, total_tg,
                 seq_len):
        super().__init__()
        self.seq_len = seq_len - 1
        self.total_cat = total_cat
        self.embd_ex = nn.Embedding(total_ex, embedding_dim=dim_model)
        # self.embd_cat = nn.Embedding(total_cat + 1, embedding_dim=dim_model)
        self.embd_tg = nn.Embedding(total_tg + 1, embedding_dim=dim_model)
        self.embd_pos = nn.Embedding(seq_len, embedding_dim=dim_model)
        self.pos_norm = nn.LayerNorm(dim_model, eps=1.0e-12)
        self.dt_fc = nn.Linear(1, dim_model, bias=False)
        self.cat_fc = nn.Linear(total_cat + 1, dim_model)
        self.cate_proj = nn.Sequential(
            nn.Linear(dim_model * 5, dim_model),
            nn.LayerNorm(dim_model),
        )

        self.multi_en = nn.MultiheadAttention(embed_dim=dim_model,
                                              num_heads=heads_en)
        self.ffn_en = Feed_Forward_block(dim_model)
        self.layer_norm1 = nn.LayerNorm(dim_model)
        self.layer_norm2 = nn.LayerNorm(dim_model)
Пример #14
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 normalize_before=False):
        """
        
        Parameters
        ----------
        d_model : {int, scalar} dim of the model input and output(default: 512)

        nhead : {int, scalar} parallel attention heads(default: 8)

        dim_feedforward : {int, scalar} FFN layer hidden neurons(default: 2048)

        dropout : {float, scalar} a Dropout layer on attn_output_weights.(default: 0.1)

        activation : {str-like, scalar} ("relu"(default), "gelu", "glu")

        normalize_before : {bool, scalar} False(default), Norm Layer whether before SA or FFN
        """
        super().__init__()

        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)
        self.normalize_before = normalize_before
Пример #15
0
    def __init__(self, language, device, embed_dim, hidden_dim, num_pos,
                 num_embed, num_heads, num_layers, dropout, n_classes):
        super().__init__()
        self.device = device
        self.language = language
        self.max_seq_len = num_pos

        # self.w_embedding = nn.Embedding(self.language.n_words, embed_dim)
        # glove vectors
        embed_dim = 300
        self.w_embedding = glove_embeddings(trainable=True)
        self.pos_embeddings = nn.Embedding(num_pos, embed_dim)

        # self.attention = TaskAttention(device, dropout)
        # self.t_embedding = nn.Embedding(num_layers, embed_dim)
        # self.t_embedding.requires_grad = False
        # self.ff_embedding = nn.Embedding(num_layers, embed_dim)
        # self.ff_embedding.requires_grad = False

        self.dropout = nn.Dropout(dropout)

        self.mhas = nn.ModuleList()
        self.ff = nn.ModuleList()
        self.ln_1, self.ln_2 = nn.ModuleList(), nn.ModuleList()
        self.tasks = []

        for i in range(num_layers):
            self.mhas.append(
                nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout))
            # self.mhas.append(nn.MultiheadAttention(embed_dim, num_heads, dropout=0))
            self.ff.append(
                nn.Sequential(nn.Linear(embed_dim, hidden_dim), nn.ReLU(),
                              nn.Linear(hidden_dim, embed_dim)))
            self.ln_1.append(nn.LayerNorm(embed_dim, eps=1e-12))
            self.ln_2.append(nn.LayerNorm(embed_dim, eps=1e-12))
            self.tasks.append(i)

        self.classify = nn.Linear(embed_dim, n_classes)
Пример #16
0
    def __init__(self,
                 num_conv_blocks,
                 kernel_size,
                 num_heads=4,
                 d_model=128,
                 dropout=0.1,
                 device="cuda:0"):

        super(StackedEncoder, self).__init__()
        # self.pos_encoder = PositionalEncoding(d_model, dropout, device)
        # self.pos_norm = nn.LayerNorm(d_model)

        self.conv_blocks = nn.ModuleList([
            DepthwiseSeparableConv(d_model, d_model, kernel_size)
            for _ in range(num_conv_blocks)
        ])
        self.conv_norm = nn.ModuleList(
            [nn.LayerNorm(d_model) for _ in range(num_conv_blocks)])

        self.self_attn_block = nn.MultiheadAttention(d_model, num_heads,
                                                     dropout)
        # self.ffn_block = FFNBlock(d_model)

        self.ffn_1 = Initialized_Conv1d(d_model, d_model, relu=True, bias=True)
        self.ffn_1_norm = nn.LayerNorm(d_model)
        self.ffn_2 = Initialized_Conv1d(d_model, d_model, bias=True)
        self.ffn_2_norm = nn.LayerNorm(d_model)
        '''self.conv_norm = nn.ModuleList([nn.LayerNorm(d_model) for _ in range(num_conv_blocks)])

       # self.self_attn_block = nn.MultiheadAttention(d_model, num_heads, dropout)
        self.self_attn_block = MultiheadAttentionLayer(
            d_model, num_heads, device)
        self.ffn_block = nn.Linear(d_model, d_model)
        self.ffn_norm = nn.LayerNorm(d_model)'''

        self.num_conv_blocks = num_conv_blocks

        self.dropout = dropout
Пример #17
0
    def __init__(self, hidden_dim, filter_size, dropout_rate, vocab_size, embedding_dim, pre_trained_embedding=None):
        super().__init__()

        self.hidden_dim = hidden_dim
        self.filter_size = filter_size
        self.dropout_rate = dropout_rate
        self.embedding_dim = embedding_dim

        if pre_trained_embedding is None:
            self.vocab_size = vocab_size
            self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim, padding_idx=0)
        else:
            self.embedding = nn.Embedding.from_pretrained(pre_trained_embedding, freeze=False, padding_idx=0)
        self.self_attention1 = nn.MultiheadAttention(self.embedding_dim, 4)
        self.layer_norm1 = nn.LayerNorm(self.embedding_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(self.dropout_rate)
        self.conv1d = nn.Conv1d(self.embedding_dim, self.hidden_dim, self.filter_size)
        self.bi_rnn = nn.LSTM(self.hidden_dim, int(self.hidden_dim / 2), batch_first=False, bidirectional=True)
        self.uni_rnn = nn.LSTM(self.hidden_dim, self.hidden_dim, batch_first=False)
        self.max_pool = nn.AdaptiveAvgPool2d((1, self.hidden_dim))
        self.linear = nn.Linear(self.hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()
Пример #18
0
 def test_auto_wrap_preset_force_leaf(self, wrap_method):
     """
     Test to ensure force-leaf modules are not wrapped, and children are not wrapped. The
     default_auto_wrap_policy forces leaf modules of type {nn.MultiheadAttention} to not be wrapped
     """
     sequential = nn.Sequential(nn.Linear(10, 10),
                                nn.MultiheadAttention(100, 1))
     my_auto_wrap_policy = functools.partial(default_auto_wrap_policy,
                                             min_num_params=40)
     if wrap_method == WrapMethod.WRAP_API:
         with enable_wrap(wrapper_cls=FSDP,
                          process_group=self.process_group):
             model = auto_wrap(sequential,
                               auto_wrap_policy=my_auto_wrap_policy)
     else:
         assert wrap_method == WrapMethod.FSDP_CTOR
         model = FSDP(sequential,
                      process_group=self.process_group,
                      fsdp_auto_wrap_policy=my_auto_wrap_policy)
     self.assertTrue(isinstance(model.module[0], FSDP))
     # Assert children of multihead attention are not wrapped
     self.assertTrue(isinstance(model.module[1], nn.MultiheadAttention))
     self.assertTrue(isinstance(model.module[1].out_proj, nn.Linear))
Пример #19
0
 def __init__(self, opt, n_node):
     super(StarSessionGraph, self).__init__()
     self.hidden_size = opt.hiddenSize
     self.n_node = n_node
     self.batch_size = opt.batchSize
     self.nonhybrid = opt.nonhybrid
     self.num_heads = opt.heads
     self.embedding = nn.Embedding(self.n_node, self.hidden_size)
     self.gnn = StarGNN(self.hidden_size, step=opt.step)
     self.attn = nn.MultiheadAttention(self.hidden_size, 1)
     self.linear_one = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
     self.linear_two = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
     self.linear_three = nn.Linear(self.hidden_size, self.num_heads, bias=False)
     self.linear_four = nn.Linear(self.hidden_size, self.hidden_size)
     self.linear_transform = nn.Linear(self.hidden_size * (self.num_heads+1), self.hidden_size, bias=True)
     self.layernorm1 = nn.LayerNorm(self.hidden_size)
     self.layernorm2 = nn.LayerNorm(self.hidden_size)
     self.layernorm3 = nn.LayerNorm(self.hidden_size)
     self.loss_function = nn.CrossEntropyLoss()
     # self.loss_function = nn.NLLLoss()
     # self.optimizer = torch.optim.Adam(self.parameters(), lr=opt.lr, weight_decay=opt.l2)
     # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)
     self.reset_parameters()
Пример #20
0
    def __init__(self, args):
        super(model_a, self).__init__()
        self.args = args

        # Encode state and action
        self.state_embedding = nn.Linear(args.state_dimension,
                                         args.state_embedding_dimension)
        self.action_embedding = nn.Linear(args.action_dimension,
                                          args.action_embedding_dimension)

        # Attention
        state_action_embedding_dim = args.state_embedding_dimension + args.action_embedding_dimension
        self.q_projection = nn.Linear(state_action_embedding_dim,
                                      state_action_embedding_dim)
        self.v_projection = nn.Linear(state_action_embedding_dim,
                                      state_action_embedding_dim)
        self.k_projection = nn.Linear(state_action_embedding_dim,
                                      state_action_embedding_dim)
        self.attention = nn.MultiheadAttention(state_action_embedding_dim,
                                               args.n_heads)

        self.predict = nn.Linear(state_action_embedding_dim,
                                 args.state_dimension)
Пример #21
0
 def __init__(self, params):
     super(UserMemoryEmbedder, self).__init__()
     self.params = params
     self.host = torch.cuda if params["use_gpu"] else torch
     self.categories = ["focus", "database", "memory"]
     self.category_embeds = {}
     for position in self.categories:
         pos_parameter = torch.randn(params["word_embed_size"])
         if params["use_gpu"]:
             pos_parameter = pos_parameter.cuda()
         pos_parameter = nn.Parameter(pos_parameter)
         self.category_embeds[position] = pos_parameter
         # Register the parameter for training/saving.
         self.register_parameter(position, pos_parameter)
     self.category_state = None
     # Project multimodal embedding to same size as encoder.
     input_size = params["asset_feature_size"] + params["word_embed_size"]
     if params["text_encoder"] == "lstm":
         output_size = params["hidden_size"]
     else:
         output_size = params["word_embed_size"]
     self.multimodal_embed_net = nn.Linear(input_size, output_size)
     self.multimodal_attend = nn.MultiheadAttention(output_size, 1)
Пример #22
0
    def __init__(self, language, device, embed_dim, hidden_dim, num_embed,
                 num_pos, num_heads, num_layers, dropout, n_classes):
        super().__init__()
        self.device = device
        self.language = language

        self.encoder = TransformerEmbedder(embed_dim, num_embed, num_pos,
                                           dropout)
        self.dropout = nn.Dropout(dropout)

        self.attentions, self.feed_forwards = nn.ModuleList(), nn.ModuleList()
        self.ln_1, self.ln_2 = nn.ModuleList(), nn.ModuleList()
        for _ in range(num_layers):
            self.attentions.append(
                nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout))
            self.feed_forwards.append(
                nn.Sequential(nn.Linear(embed_dim, hidden_dim), nn.ReLU(),
                              nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
                              nn.Linear(hidden_dim, embed_dim)))
            self.ln_1.append(nn.LayerNorm(embed_dim, eps=1e-12))
            self.ln_2.append(nn.LayerNorm(embed_dim, eps=1e-12))

        self.classify = nn.Linear(embed_dim, n_classes)
Пример #23
0
    def __init__(self, args):
        super(model_b, self).__init__()
        self.args = args

        # Encode the inputs
        self.observation_embedding = nn.Linear(
            args.observation_dimension, args.observation_embedding_dimension)
        self.action_embedding = nn.Linear(args.action_dimension,
                                          args.action_embedding_dimension)

        # Multi-head attention
        observation_action_embedding_dim = args.observation_embedding_dimension + args.action_embedding_dimension
        self.q_projection = nn.Linear(observation_action_embedding_dim,
                                      observation_action_embedding_dim)
        self.v_projection = nn.Linear(observation_action_embedding_dim,
                                      observation_action_embedding_dim)
        self.k_projection = nn.Linear(observation_action_embedding_dim,
                                      observation_action_embedding_dim)
        self.attention = nn.MultiheadAttention(
            observation_action_embedding_dim, args.n_heads)

        self.predict = nn.Linear(observation_action_embedding_dim,
                                 args.observation_dimension)
Пример #24
0
    def __init__(
        self,
        d_model,
        nhead,
        dim_feedforward=2048,
        dropout=0.1,
        activation="relu",
        normalize_before=False,
    ):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)
        assert not normalize_before, "normalize_before is not supported"
Пример #25
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 normalize_before=False,
                 return_atten_map=False):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)

        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)
        self.normalize_before = normalize_before
        self.return_atten_map = return_atten_map
Пример #26
0
    def __init__(self,
                 input_size,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 normalize_before=False):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(input_size,
                                               nhead,
                                               dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(input_size, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, input_size)

        self.norm1 = nn.LayerNorm(input_size)
        self.norm2 = nn.LayerNorm(input_size)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)
        self.normalize_before = normalize_before
Пример #27
0
    def __init__(
        self,
        nhead,
        d_model,
        dropout=0.0,
        bias=True,
        add_bias_kv=False,
        add_zero_attn=False,
        kdim=None,
        vdim=None,
    ):
        super().__init__()

        self.att = nn.MultiheadAttention(
            embed_dim=d_model,
            num_heads=nhead,
            dropout=dropout,
            bias=bias,
            add_bias_kv=add_bias_kv,
            add_zero_attn=add_zero_attn,
            kdim=kdim,
            vdim=vdim,
        )
Пример #28
0
    def __init__(self, d_model=256, d_ffn=1024,
                 dropout=0.1, activation="relu",
                 n_levels=4, n_heads=8, n_points=4):
        super().__init__()

        # cross attention
        self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
        self.dropout1 = nn.Dropout(dropout)
        self.norm1 = nn.LayerNorm(d_model)

        # self attention
        self.self_attn = nn.MultiheadAttention(
            d_model, n_heads, dropout=dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.norm2 = nn.LayerNorm(d_model)

        # ffn
        self.linear1 = nn.Linear(d_model, d_ffn)
        self.activation = _get_activation_fn(activation)
        self.dropout3 = nn.Dropout(dropout)
        self.linear2 = nn.Linear(d_ffn, d_model)
        self.dropout4 = nn.Dropout(dropout)
        self.norm3 = nn.LayerNorm(d_model)
Пример #29
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu"):
        super().__init__()
        self.multihead_attn = nn.MultiheadAttention(d_model,
                                                    nhead,
                                                    dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)
Пример #30
0
    def __init__(self, config, dataset):
        super(AutoInt, self).__init__(config, dataset)

        # load parameters info
        self.attention_size = config['attention_size']
        self.dropout_probs = config['dropout_probs']
        self.n_layers = config['n_layers']
        self.num_heads = config['num_heads']
        self.mlp_hidden_size = config['mlp_hidden_size']
        self.has_residual = config['has_residual']

        # define layers and loss
        self.att_embedding = nn.Linear(self.embedding_size,
                                       self.attention_size)
        self.embed_output_dim = self.num_feature_field * self.embedding_size
        self.atten_output_dim = self.num_feature_field * self.attention_size
        size_list = [self.embed_output_dim] + self.mlp_hidden_size
        self.mlp_layers = MLPLayers(size_list, dropout=self.dropout_probs[1])
        # multi-head self-attention network
        self.self_attns = nn.ModuleList([
            nn.MultiheadAttention(self.attention_size,
                                  self.num_heads,
                                  dropout=self.dropout_probs[0])
            for _ in range(self.n_layers)
        ])
        self.attn_fc = torch.nn.Linear(self.atten_output_dim, 1)
        self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
        if self.has_residual:
            self.v_res_res_embedding = torch.nn.Linear(self.embedding_size,
                                                       self.attention_size)

        self.dropout_layer = nn.Dropout(p=self.dropout_probs[2])
        self.sigmoid = nn.Sigmoid()
        self.loss = nn.BCELoss()

        # parameters initialization
        self.apply(self._init_weights)