def forward(self, s):
        x = self.preprocess(s)
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        x = F.leaky_relu(self.bn3(self.conv3(x)))
        x = F.leaky_relu(self.bn4(self.conv4(x)))
        x = F.leaky_relu(self.bn5(self.conv5(x)))
        x = F.leaky_relu(self.bn6(self.conv6(x)))

        # x = x.view(x.size(0), -1)

        policy = F.leaky_relu(self.policy_bn(self.conv_policy(x))).view(
            x.size(0), -1)
        policy = self.policy_dropout(policy)
        policy = F.dropout(policy, p=0.3,
                           training=True)  # change training method
        policy = self.softmax(self.linear_policy(policy))

        value = F.leaky_relu(self.value_bn(self.conv_value(x))).view(
            x.size(0), -1)
        value = self.value_dropout(value)
        value = F.dropout(value, p=0.3,
                          training=True)  # change training method
        value = F.leaky_relu(self.fc_value(value))
        value = torch.tanh(self.linear_output(value))

        return policy, value
Пример #2
0
    def forward(self, features, adj):
        x = self.conv1(features, adj)
        x = F.relu(x)
        x = F.dropout(x, self.dropout, self.training)
        x = self.conv2(x, adj)

        return F.log_softmax(x, dim=1)
Пример #3
0
 def forward(self, x):
     x = F.relu(F.max_pool2d(self.conv1(x), 2))
     x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
     x = x.view(-1, 320)
     x = F.relu(self.fc1(x))
     x = F.dropout(x, training=self.training)
     x = self.fc2(x)
     return F.log_softmax(x)
Пример #4
0
 def forward(self, segfeats, seglens, wordfeats, wordmasks):
     x1 = self.v2s(segfeats, wordfeats, wordmasks)
     frames1, x1 = self.cross_gate(segfeats, x1)
     mmfeats = torch.cat([frames1, x1], -1)
     # wordfeats = self.bilinear(frames1, x1, F.relu)
     mmfeats = self.rnn(mmfeats, seglens, self.video_segment_num)
     mmfeats = F.dropout(mmfeats, self.dropout, self.training)
     return mmfeats
Пример #5
0
    def forward(self, seg_feats, seglen):
        """
            seg_feats (tensor[B, seg, feat_dim])
            seglen (tensor[B])
        """
        seg_feats = F.dropout(seg_feats, self.dropout, self.training)
        seg_feats = seg_feats.transpose(0, 1)

        for attention in self.attn_layers:
            res = seg_feats
            seg_feats, _ = attention(seg_feats,
                                     seg_feats,
                                     seg_feats,
                                     None,
                                     attn_mask=self.self_attn_mask)
            seg_feats = F.dropout(seg_feats, self.dropout, self.training)
            seg_feats = res + seg_feats

        seg_feats = self.rnn(seg_feats, seglen, self.video_segment_num)
        seg_feats = F.dropout(seg_feats, self.dropout, self.training)

        seg_feats = seg_feats.transpose(0, 1)
        return seg_feats
Пример #6
0
    def forward(self, x):
        
            # CNNs
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0), -1)           # flatten the output of conv

        # FC layers
        x = F.relu(self.fc1(x))
        # x = F.dropout(x, p=self.drop_p, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=self.drop_p, training=self.training)
        out = self.fc3(x)
            
        # swap time and sample dim such that (sample dim, time dim, CNN latent dim)
        # cnn_embed_seq: shape=(batch, time_step, input_size)

        return out
Пример #7
0
    def forward(self, frames, seglens, x, node_mask):
        """
        frames [B, seg, vdim] segfeats
        seglens [B]
        x [B, len, wdim] wordfeats
        node_mask [B, len] wordmasks
        """
        frames_len = frames.shape[1]
        #attentive
        x1_att, x2_att, _, _ = self.atten(frames, x, node_mask)
        x1_m, x2_m = x1_att, x2_att#self.message_v(x1_att), self.message_s(x2_att)
        frames1 = self.update_v(x1_m, frames)
        x1 = self.update_s(x2_m, x)

        x1_m, _, a1, _ = self.intra_v(frames1, frames1, node_mask)
        x2_m, _, a2, _ = self.intra_s(x1, x1, node_mask)
        frames1 = self.update_v_intra(x1_m, frames1)
        x1 = self.update_s_intra(x2_m, x1)
        
        """
        Below is what exactly appeared in CSMGAN's offical code
        """
        #layer 2
        #x1_att, x2_att, a1, a2 = self.atten(frames1, x1, node_mask)
        #x1_m, x2_m = x1_att, x2_att#self.message_v(x1_att), self.message_s(x2_att)
        #frames1 = self.update_v(x1_m, frames1)
        #x1 = self.update_s(x2_m, x1)
        #x1_m, _, a1, _ = self.intra_v(frames1, frames1, node_mask)
        #x2_m, _, a2, _ = self.intra_s(x1, x1, node_mask)
        #frames1 = self.update_v_intra(x1_m, frames1)
        #x1 = self.update_s_intra(x2_m, x1)
        
        #frames1, x1 = frames, x
        #a1, a2 = 1, 1
        # interactive
        x1 = self.v2s(frames1, x1, node_mask)
        x = torch.cat([frames1, x1], -1) #x1
        x = self.rnn(x, seglens, frames_len)
        x = F.dropout(x, self.dropout, self.training)
        
        return x
 def forward(self, batch):
     """
     First composes the input vectors into one representation. This is then feed trough a hidden layer with a Relu
     and
     finally trough an output layer that returns weights for each class.
     :param word1: word1: the representation of the first word (torch tensor)
     :param word2: word2: the representation of the second word (torch tensor)
     :param training: training: True if the model should be trained, False if the model is in inference
     :return: the raw weights for each class
     """
     device = batch["device"]
     self._composed_phrase = self.compose(batch["w1"].to(device),
                                          batch["w2"].to(device),
                                          self.training)
     if self.add_single_words:
         w1_w2 = torch.cat((batch["w1"].to(device), batch["w2"].to(device)),
                           1)
         self._composed_phrase = torch.cat((w1_w2, self.composed_phrase), 1)
     hidden = F.relu(self.hidden(self.composed_phrase))
     hidden = F.dropout(hidden, p=self.dropout_rate)
     class_weights = self.output(hidden)
     return class_weights
Пример #9
0
    def forward(self, X, X_padding_mask=None, coverage=None, dropout=0.1):
        """
        K / key: (L, B, H) encoder_outputs, encoder feature
        V / value: (L, B, H) to calculate the context vector
        Q / query: (L, B, H) last_hidden, deocder feature
        X_padding_mask: (B, 1, L)
        coverage: (B, L)
        """
        X_dim = X.size(-1)
        X_query = X.transpose(0, 1)  # -> (B, L, H)
        X_key = X.transpose(0, 1)  # -> (B, L, H)
        X_value = X.transpose(0, 1)  # -> (B, L, H)

        scores = torch.matmul(X_query, X_key.transpose(-2, -1)) / math.sqrt(
            X_dim)  # (B, L, H) x (B, H, L) -> (B, L, L)

        attn_dist = F.softmax(scores, dim=-1)  # (B, L, L)
        attn_dist = F.dropout(attn_dist, p=dropout)
        context = torch.matmul(attn_dist,
                               X_value)  # (B, L, L) x (B, L, H) -> (B, L, H)

        # calculate average
        context = context.sum(1) / context.size(1)
        return context, attn_dist
Пример #10
0
    def forward(self,
                query,
                key,
                value,
                key_padding_mask=None,
                need_weights=True,
                attn_mask=None):
        """Input shape: Time x Batch x Channel

        Self-attention can be implemented by passing in the same arguments for
        query, key and value. Timesteps can be masked by supplying a T x T mask in the
        `attn_mask` argument. Padding elements can be excluded from
        the key by passing a binary ByteTensor (`key_padding_mask`) with shape:
        batch x src_len, where padding elements are indicated by 1s.
        """

        qkv_same = query.data_ptr() == key.data_ptr() == value.data_ptr()
        kv_same = key.data_ptr() == value.data_ptr()

        tgt_len, bsz, embed_dim = query.size()
        assert embed_dim == self.embed_dim
        assert list(query.size()) == [tgt_len, bsz, embed_dim]
        assert key.size() == value.size()

        saved_state = None

        if qkv_same:
            # self-attention
            q, k, v = self.in_proj_qkv(query)
        elif kv_same:
            # encoder-decoder attention
            q = self.in_proj_q(query)
            if key is None:
                assert value is None
                k = v = None
            else:
                k, v = self.in_proj_kv(key)
        else:
            q = self.in_proj_q(query)
            k = self.in_proj_k(key)
            v = self.in_proj_v(value)
        q = q * self.scaling

        if self.bias_k is not None:
            assert self.bias_v is not None
            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
            if attn_mask is not None:
                attn_mask = torch.cat(
                    [attn_mask,
                     attn_mask.new_zeros(attn_mask.size(0), 1)],
                    dim=1)
            if key_padding_mask is not None:
                key_padding_mask = torch.cat([
                    key_padding_mask,
                    key_padding_mask.new_zeros(key_padding_mask.size(0), 1)
                ],
                                             dim=1)

        q = q.contiguous().view(tgt_len, bsz * self.num_heads,
                                self.head_dim).transpose(0, 1)
        if k is not None:
            k = k.contiguous().view(-1, bsz * self.num_heads,
                                    self.head_dim).transpose(0, 1)
        if v is not None:
            v = v.contiguous().view(-1, bsz * self.num_heads,
                                    self.head_dim).transpose(0, 1)

        # q:    bsz * num_heads, tgt_len, head_dim
        # k, v: bsz * num_heads, src_len, head_dim
        # key_padding_mask: bsz, src_len

        src_len = k.size(1)

        # This is part of a workaround to get around fork/join parallelism
        # not supporting Optional types.
        if key_padding_mask is not None and key_padding_mask.shape == torch.Size(
            []):
            key_padding_mask = None

        if key_padding_mask is not None:
            assert key_padding_mask.size(0) == bsz
            assert key_padding_mask.size(1) == src_len

        if self.add_zero_attn:
            src_len += 1
            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])],
                          dim=1)
            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])],
                          dim=1)
            if attn_mask is not None:
                attn_mask = torch.cat(
                    [attn_mask,
                     attn_mask.new_zeros(attn_mask.size(0), 1)],
                    dim=1)
            if key_padding_mask is not None:
                key_padding_mask = torch.cat([
                    key_padding_mask,
                    torch.zeros(key_padding_mask.size(0),
                                1).type_as(key_padding_mask)
                ],
                                             dim=1)

        attn_weights = torch.bmm(q, k.transpose(1, 2))
        assert list(
            attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]

        # attn_weights: bsz * num_heads, tgt_len, src_len
        # attn_mask:    tgt_len, src_len
        if attn_mask is not None:
            attn_mask = attn_mask.unsqueeze(0)
            if self.onnx_trace:
                attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1)
            attn_weights += attn_mask

        if key_padding_mask is not None:
            # don't attend to padding symbols
            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len,
                                             src_len)
            if self.onnx_trace:
                attn_weights = torch.where(
                    key_padding_mask.unsqueeze(1).unsqueeze(2),
                    torch.Tensor([float("-Inf")]),
                    attn_weights.float()).type_as(attn_weights)
            else:
                attn_weights = attn_weights.float().masked_fill(
                    key_padding_mask.unsqueeze(1).unsqueeze(2),
                    float('-inf'),
                ).type_as(attn_weights)  # FP16 support: cast to float and back
            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len,
                                             src_len)

        attn_weights = self.softmax(
            attn_weights,
            dim=-1,
            onnx_trace=self.onnx_trace,
        ).type_as(attn_weights)
        attn_weights = F.dropout(attn_weights,
                                 p=self.dropout,
                                 training=self.training)

        attn = torch.bmm(attn_weights, v)
        assert list(
            attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
        if self.onnx_trace and attn.size(1) == 1:
            # when ONNX tracing a single decoder step (sequence length == 1)
            # the transpose is a no-op copy before view, thus unnecessary
            attn = attn.contiguous().view(tgt_len, bsz, embed_dim)
        else:
            attn = attn.transpose(0,
                                  1).contiguous().view(tgt_len, bsz, embed_dim)
        attn = self.out_proj(attn)

        if need_weights:
            # average attention weights over heads
            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len,
                                             src_len)
            attn_weights = attn_weights.sum(dim=1) / self.num_heads
        else:
            attn_weights = None

        return attn, attn_weights
Пример #11
0
 def forward(self, input):
     if not self.freezed:
         return F.dropout(input, self.p, self.training, self.inplace)
     else:
         return input * torch.stack([self.mask] * input.size(0)).type(
             input.type())