示例#1
0
    def __init__(self, hidden_size, dropout_p=0.0):
        super().__init__()
        self.dropout = nn.Dropout(dropout_p)
        self.softmax = nn.Softmax(dim=-1)

        self.linear_question = nlpnn.WeightDropLinear(hidden_size,
                                                      hidden_size,
                                                      weight_dropout=dropout_p,
                                                      bias=False)
        self.linear_video = nlpnn.WeightDropLinear(hidden_size,
                                                   hidden_size,
                                                   weight_dropout=dropout_p,
                                                   bias=False)
        self.linear_v_question = nlpnn.WeightDropLinear(
            hidden_size, hidden_size, weight_dropout=dropout_p, bias=False)
        self.linear_v_video = nlpnn.WeightDropLinear(hidden_size,
                                                     hidden_size,
                                                     weight_dropout=dropout_p,
                                                     bias=False)

        self.linear_final = nn.Sequential(
            nlpnn.WeightDropLinear(2 * hidden_size,
                                   hidden_size,
                                   weight_dropout=dropout_p,
                                   bias=False), nn.ReLU(),
            nlpnn.WeightDropLinear(hidden_size,
                                   hidden_size,
                                   weight_dropout=dropout_p,
                                   bias=False))

        self.layer_norm_qv = nn.LayerNorm(hidden_size,
                                          elementwise_affine=False)
        self.layer_norm_vq = nn.LayerNorm(hidden_size,
                                          elementwise_affine=False)
示例#2
0
    def __init__(self,
                 hidden_size,
                 n_layers,
                 dropout_p,
                 vocab_size,
                 q_max_len,
                 v_max_len,
                 embedding=None,
                 update_embedding=True,
                 position=True):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout_p)
        self.ln_q = nn.LayerNorm(hidden_size, elementwise_affine=False)
        self.ln_v = nn.LayerNorm(hidden_size, elementwise_affine=False)
        self.n_layers = n_layers
        self.position = position

        embedding_dim = embedding.shape[
            1] if embedding is not None else hidden_size
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        # ! no embedding init
        # if embedding is not None:
        #     # self.embedding.weight.data.copy_(torch.from_numpy(embedding))
        #     self.embedding.weight = nn.Parameter(
        #         torch.from_numpy(embedding).float())
        self.upcompress_embedding = nlpnn.WeightDropLinear(
            embedding_dim, hidden_size, weight_dropout=dropout_p, bias=False)
        self.embedding.weight.requires_grad = update_embedding

        self.project_c3d = nlpnn.WeightDropLinear(4096, 2048, bias=False)

        self.project_resnet_and_c3d = nlpnn.WeightDropLinear(
            4096, hidden_size, weight_dropout=dropout_p, bias=False)

        # max_seq_len is 35 or 80
        self.q_pos_embedding = PositionalEncoding(hidden_size, q_max_len)
        self.v_pos_embedding = PositionalEncoding(hidden_size, v_max_len)
示例#3
0
    def __init__(self, hidden_size, dropout_p=0.0):
        super().__init__()
        self.dropout = nn.Dropout(dropout_p)
        self.softmax = nn.Softmax(dim=-1)

        self.linear_q = nlpnn.WeightDropLinear(hidden_size,
                                               hidden_size,
                                               weight_dropout=dropout_p,
                                               bias=False)
        self.linear_v = nlpnn.WeightDropLinear(hidden_size,
                                               hidden_size,
                                               weight_dropout=dropout_p,
                                               bias=False)
        self.linear_k = nlpnn.WeightDropLinear(hidden_size,
                                               hidden_size,
                                               weight_dropout=dropout_p,
                                               bias=False)

        self.linear_final = nlpnn.WeightDropLinear(hidden_size,
                                                   hidden_size,
                                                   weight_dropout=dropout_p,
                                                   bias=False)

        self.layer_norm = nn.LayerNorm(hidden_size, elementwise_affine=False)