Пример #1
0
    def forward(self, x, z_input):
        x = self.layer_norm_x(x)
        z = self.layer_norm_1(z_input)
        z, _ = self.attention(z, x, x)

        z = self.dropout(z)
        z = self.linear1(z)

        z = self.layer_norm_2(z)
        z = self.linear2(z)
        z = F.gelu(z)
        z = self.dropout(z)
        z = self.linear3(z)

        return z + z_input
Пример #2
0
    def forward(self, x):
        L = self.n_layers
        skips = []
        outs = []
        rs = x

        for n in range(L):
            if n >= 1:
                rs = F.avg_pool1d(rs, 2, 2)
            skip = F.pad(rs, [1, 1], 'reflect')
            skip = self.contract[2 * n](skip)
            skip = F.gelu(skip)
            skip = self.contract[2 * n + 1](skip)
            skips.append(skip)
            if n == 0:
                rs = F.gelu(skip)
            else:
                rs = rs + F.gelu(skip)
        skips = list(reversed(skips))
        for n in range(L):
            skip = F.pad(rs, [1, 1], 'reflect')
            skip = self.expand[2 * n](skip)
            skip = F.gelu(skip)
            skip = skip + skips[n]
            skip = self.expand[2 * n + 1](skip)
            if n < L - 1:
                skip, out = skip.split(
                    [self.c_h, self.c_k + self.c_out * 2**(L - n - 1)], dim=1)
            else:
                out = skip
            outs.append(out)

            if n < L - 1:
                rs = rs + F.gelu(skip)
                rs = F.interpolate(rs, scale_factor=2)
        return outs
Пример #3
0
    def forward(self,
                dense_x: torch.Tensor,
                dense_edge_index: torch.Tensor,
                batch: torch.Tensor,
                return_edge: bool = False):
        assert dense_x.dim() == dense_edge_index.dim()
        assert dense_x.size(0) == dense_edge_index.size(0)  # batch_size
        assert dense_x.size(1) == dense_edge_index.size(1)  # graph_size
        assert dense_x.size(2) == self.embed_dim
        assert dense_edge_index.size(2) == 2, f"{dense_edge_index.size()}"

        batch_size = dense_x.size(0)
        graph_size = dense_x.size(1)
        device = dense_x.device

        node_x = dense_x.flatten(0, 1)  # flatten on first dimension(batch dim)
        x = node_x

        edge_index_offset = torch.arange(batch_size,
                                         device=device) * graph_size
        edge_index = (
            (dense_edge_index + edge_index_offset[:, None, None]).flatten(
                0, 1).T)  # shape=[2, batch_size * graph_size]
        reversed_edge_index = edge_index.flipud()

        x_dir_0 = x
        x_dir_1 = x
        for _ in range(self.num_layers):
            x_dir_0 = checkpoint(self.gnn_layer, x_dir_0, edge_index)
            x_dir_1 = checkpoint(self.reversed_gnn_layer, x_dir_1,
                                 reversed_edge_index)

        x = F.gelu(x_dir_0 + x_dir_1)
        x = self.norm_out(x, batch)
        tour_embeddings = self.pooling_func(x, batch)

        dense_edge_embeddings = None
        if return_edge:
            edge_embeddings = self.edge_extractor(node_x=node_x,
                                                  solution_x=x,
                                                  edge_index=edge_index,
                                                  batch=batch)
            assert edge_embeddings.dim() == 2
            assert edge_embeddings.size(0) == batch_size * graph_size
            dense_edge_embeddings = edge_embeddings.reshape(
                batch_size, graph_size, -1)

        return tour_embeddings, dense_edge_embeddings
    def forward(self, encoder_outputs, durations, frames_positions,
                input_lengths):
        """ Gaussian upsampling
        PARAMS
        ------
        encoder_outputs: Encoder outputs  [B, N, H]
        durations: phoneme durations  [B, N]
        frames_positions: Transformer-styled frames_positions [B, T, pos_embed]
        input_lengths: for text masks
        RETURNS
        -------
        encoder_upsampling_outputs: upsampled encoder_output  [B, T, H]
        """
        B = encoder_outputs.size(0)
        N = encoder_outputs.size(1)
        # total_decoder_steps = torch.max(torch.sum(durations, dim=1)).item()
        total_decoder_steps = frames_positions.size(1)
        c = torch.cumsum(durations, dim=1, dtype=torch.float) - 0.5 * durations
        c = c.unsqueeze(2)
        t = torch.arange(total_decoder_steps).expand(
            B, N, total_decoder_steps).float().cuda()  # [B, N, T]

        # calculate range parameters using ConvNorm and GRU net
        self.range_parameter_layer.flatten_parameters()
        processed_durations = durations.float().unsqueeze(1)
        for duration_conv in self.duration_convs:
            processed_durations = F.dropout(
                F.gelu(duration_conv(processed_durations)), 0.5, self.training)

        range_parameters, _ = self.range_parameter_layer(
            torch.cat((encoder_outputs, processed_durations.transpose(1, 2)),
                      dim=2))
        var = F.softplus(self.range_dense(range_parameters))

        #         w_t = -torch.pow((t-c)/var, 2)
        w_t = -0.5 * (np.log(2.0 * np.pi) + torch.log(var) +
                      torch.pow(t - c, 2) / var)
        if input_lengths is not None:
            input_masks = ~get_mask_from_lengths(input_lengths, N)  # [B, N]
            masks = input_masks.unsqueeze(2)
            w_t.data.masked_fill_(masks, self.mask_score)
        w_t = F.softmax(w_t, dim=1)
        encoder_upsampling_outputs = torch.bmm(w_t.transpose(
            1, 2), encoder_outputs)  # [B, T, encoder_hidden_size]
        encoder_upsampling_outputs = torch.cat(
            (encoder_upsampling_outputs, frames_positions), dim=2)

        return encoder_upsampling_outputs
Пример #5
0
    def forward(self, inputs, enc_outputs, lookahead_mask, padding_mask):
        attention_1, _ = self.multi_head_attention_1(inputs, inputs, inputs,
                                                     lookahead_mask)
        attention_1 = self.dropout_1(attention_1)
        attention_1 = self.norm_1(attention_1 + inputs)

        attention_2, _ = self.multi_head_attention_2(attention_1, enc_outputs,
                                                     enc_outputs, padding_mask)
        attention_2 = self.dropout_2(attention_2)
        attention_2 = self.norm_2(attention_2 + attention_1)

        outputs = F.gelu(self.dense_1(attention_2))
        outputs = self.dense_2(outputs)
        outputs = self.dropout_3(outputs)
        outputs = self.norm_3(outputs)
        return outputs
Пример #6
0
    def forward(self, src: torch.FloatTensor, src_mask: torch.FloatTensor) -> torch.FloatTensor:
        # multi head attention
        src1 = self.layer_norm1(src)
        src1 = self.self_attn(src1, src_mask)
        # add and norm
        src = src + self.dropout1(src1)

        # feed  forward
        src1 = self.layer_norm2(src)
        src1 = F.gelu(self.intermediate_linear1(src1))
        src1 = self.intermediate_linear2(src1)
        src1 = self.dropout(src1)
        # add and norm
        src = src + src1

        return src
Пример #7
0
    def forward(self, src_seq, src_mask, return_attns=False):

        enc_slf_attn_list = []

        # -- Forward
        enc_output = F.gelu(src_seq)

        for enc_layer in self.layer_stack:
            enc_output, enc_slf_attn = enc_layer(enc_output, slf_attn_mask=src_mask)
            enc_slf_attn_list += [enc_slf_attn] if return_attns else []

        enc_output = self.layer_norm(enc_output)

        if return_attns:
            return enc_output, enc_slf_attn_list
        return enc_output,
Пример #8
0
    def forward(self, input_tensor, seed, random=True):
        # [batch, length, d_model]
        chunks = torch.chunk(input_tensor, chunks=self.chunk, dim=1)
        # [batch, length // chunk, d_model]
        output = [F.gelu(self.linear1(chunk)) for chunk in chunks]
        # [batch, length // chunk, d_ff]
        if self.training:
            output = [
                deterministic_dropout(chunk, seed + i, dropout=self.dropout)
                for chunk, i in zip(output, range(self.chunk))
            ]
            # [batch, length // chunk, d_ff]

        output = torch.cat([self.linear2(chunk) for chunk in output], dim=1)
        # [batch, length, d_model]
        return output
Пример #9
0
    def forward(self, lv, ls):

        ls.set_values(lv)

        #similar to densenet and resnet: bn, relu, conv https://arxiv.org/pdf/1603.05027.pdf
        if self.norm is None:
            self.norm = GroupNormLatticeModule(lv.shape[1])
        lv, ls=self.norm(lv,ls)
        lv=F.gelu(lv)
        if self.with_dropout:
            lv = self.drop(lv)
        ls.set_values(lv)
        lv_1, ls_1 = self.conv(lv, ls)
        ls_1.set_values(lv_1)

        return lv_1, ls_1
Пример #10
0
 def forward(self, G, out_key_one, out_key_two):
     h = {}
     for ntype in G.ntypes:
         n_id = self.node_dict[ntype]
         h[ntype] = F.gelu(self.adapt_ws[n_id](self.node_emb[ntype](
             G.nodes[ntype].data['id'])))
     for i in range(self.n_layers):
         h = self.gcs[i](G, h)
     h1_dict = {}
     h2_dict = {}
     h1_out = self.out(h[out_key_one])
     h2_out = self.out(h[out_key_two])
     for i in range(h1_out.shape[0]):
         h1_dict[G.nodes[out_key_one].data['id'][i].item()] = h1_out[i]
     for i in range(h2_out.shape[0]):
         h2_dict[G.nodes[out_key_two].data['id'][i].item()] = h2_out[i]
     return h1_dict, h2_dict
Пример #11
0
    def forward(self, input, indices=None):
        """
        :param input: T x B x H
        :param indices: T x B or B
        :return:
        """
        # n_factors = self.r.size(0)
        bsz = input.size(1)
        seq_len = input.size(0)

        weight_ = F.dropout(self.weight,
                            p=self.weight_drop,
                            training=self.training)

        if indices.size(0) == 1 and len(indices.shape) == 1:
            r = torch.index_select(self.r, 0, indices).squeeze(0)
            s = torch.index_select(self.s, 0, indices).squeeze(0)

            # weight_mask = torch.sum(torch.einsum('bi,bj->bij', (s, r)), dim=0)
            # weight_mask = torch.bmm(s.unsqueeze(-1), r.unsqueeze(1))
            if self.use_multiplicative:
                rm = torch.index_select(self.rm, 0, indices).squeeze(0)
                sm = torch.index_select(self.sm, 0, indices).squeeze(0)
                weight_ = weight_ * torch.sum(
                    torch.bmm(rm.unsqueeze(-1), sm.unsqueeze(1)), dim=0)

            if self.mfw_activation == "none":
                weight_ = weight_
            elif self.mfw_activation == "gelu":
                weight_ = F.gelu(weight_)
            elif self.mfw_activation == "silu":
                weight_ = F.silu(weight_)
            else:
                raise NotImplementedError

            weight_mask = torch.bmm(r.unsqueeze(-1), s.unsqueeze(1))
            weight_mask = torch.sum(weight_mask, dim=0)
            weight_ = weight_ + weight_mask

            input = F.linear(input, weight_.t(), self.bias)
            # input = torch.addmm(self.bias, input.view(-1, input.size(-1)), weight_)
            # input = input.view(seq_len, bsz, input.size(-1))
            return input
        else:
            print(indices.size(), input.size())
            raise NotImplementedError
Пример #12
0
    def forward(self, x):
        out = F.gelu(self.bn1(self.conv1(x)))

        # out =sine(self.bn1(self.conv1(x)))
        # out = F.leaky_sin(self.bn1(self.conv1(x)), negative_slope=0.1)
        out = F.tanh(self.bn2(self.conv2(out)))

        # out =sine(self.bn2(self.conv2(out)))
        # out = F.leaky_sin(self.bn2(self.conv2(out)), negative_slope=0.1)

        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        # out =sine(out)
        # out = F.leaky_sin(out, negative_slope=0.1)

        return out
Пример #13
0
    def forward(self, embedded, mask):

        #embedded = [batch size, seq len, emb dim]

        embedded = self.layer_norm_1(
            embedded +
            self.dropout(self.self_attn(embedded, embedded, embedded, mask)))

        #embedded = [batch size, seq len, emb dim]

        embedded = self.layer_norm_2(
            embedded +
            self.dropout(self.fc_2(self.dropout(F.gelu(self.fc_1(embedded))))))

        #embedded = [batch size, seq len, emb dim]

        return embedded
Пример #14
0
 def update(self, aggr_out, node_inp, node_type):
     '''
         Step 3: Target-specific Aggregation
         x = W[node_type] * gelu(Agg(x)) + x
     '''
     aggr_out = F.gelu(aggr_out)
     res = torch.zeros(aggr_out.size(0), self.out_dim).to(node_inp.device)
     for target_type in range(self.num_types):
         idx = (node_type==int(target_type)).reshape(-1)
         if idx.sum() == 0:
             continue
         '''
             Add skip connection with learnable weight self.skip[t_id]
         '''
         alpha = F.sigmoid(self.skip[target_type])
         res[idx] = self.a_linears[target_type](aggr_out[idx]) * alpha + node_inp[idx] * (1 - alpha)
     return self.drop(res)
Пример #15
0
 def forward(self, x, hidden):
     #First Layer --> Conv1
     x = self.conv1(x)
     x = self.conv_layers(x)
     sizes = x.size()
     x = x.view(sizes[0], sizes[1]*sizes[2], sizes[3]) #(batch, features*channel, time)
     x = x.transpose(1,2)    #(batch, time, features*channel)
     x = self.fully_connected(x)
     #x = F.relu(x)
     x = F.gelu(x)
     x = self.dropout(x)
     # GRU Bidirectional  (batch, time, gru_input_size)
     inputs = (x, hidden)
     #inputs = x
     out, hidden = self.gru_layers(inputs) 
     # Fully connected layers
     out = self.classifier(out)
     return out, hidden
Пример #16
0
    def forward(self, self_attn, label_attn):
        factor1 = torch.sigmoid(self.linear_weight1(self_attn))
        factor2 = torch.sigmoid(self.linear_weight2(label_attn))
        factor1 = factor1 / (factor1 + factor2)
        factor2 = 1 - factor1

        out1 = factor1 * self_attn  #[batch, label, hidden]
        out2 = factor2 * label_attn  #[batch, label, hidden]

        out = torch.cat((out1, out2), dim=-1)

        out = self.fusion_linear(out)
        out = self.dropout(out)
        out = self.ln(out)
        out = F.gelu(out)
        out = self.out_linear(out)

        return torch.squeeze(out, -1)
Пример #17
0
    def transform(self, hidden_states):
        weight = getattr(
            self, 'cls.predictions.transform.dense.weight'.replace('.', '__'))
        bias = getattr(
            self, 'cls.predictions.transform.dense.bias'.replace('.', '__'))
        hidden_states = linear(hidden_states, weight, bias)

        hidden_states = F.gelu(hidden_states)

        weight = getattr(
            self,
            'cls.predictions.transform.LayerNorm.weight'.replace('.', '__'))
        bias = getattr(
            self,
            'cls.predictions.transform.LayerNorm.bias'.replace('.', '__'))
        hidden_states = layer_norm(weight, bias, hidden_states, 1e-12)

        return hidden_states
 def forward(self, src, src_mask=None, src_key_padding_mask=None):
     r"""Pass the input through the endocder layer.
     Args:
         src: the sequnce to the encoder layer (required).
         src_mask: the mask for the src sequence (optional).
         src_key_padding_mask: the mask for the src keys per batch (optional).
     """
     outputs = self.self_attn(src,
                              src,
                              src,
                              key_padding_mask=src_key_padding_mask)
     scaled_values, queries, keys, slot_vectors, slot_assignment_scores, slot_attention_scores = outputs
     src = src + self.dropout1(scaled_values)
     src = self.norm1(src)
     src2 = self.linear2(self.dropout(F.gelu(self.linear1(src))))
     src = src + self.dropout2(src2)
     src = self.norm2(src)
     return src, queries, keys, slot_vectors, slot_assignment_scores, slot_attention_scores
Пример #19
0
    def forward(self, lv, ls):

        ls.set_values(lv)

        #similar to densenet and resnet: bn, relu, conv https://arxiv.org/pdf/1603.05027.pdf
        if self.norm is None:
            self.norm = GroupNormLatticeModule(lv.shape[1])
            self.linear= torch.nn.Linear(lv.shape[1], self.out_channels, bias=self.use_bias).to("cuda") 
            with torch.no_grad():
                torch.nn.init.kaiming_normal_(self.linear.weight, mode='fan_in', nonlinearity='relu')

        lv, ls=self.norm(lv,ls)
        # lv=self.relu(lv)
        lv=F.gelu(lv)
        ls.set_values(lv)
        lv = self.linear(lv)
        ls.set_values(lv)
        return lv, ls
Пример #20
0
    def forward(self, x, mask):

        N, S = x.size()

        last_hidden_states, _, all_hidden_states = self.BERT(
            x, attention_mask=mask)

        last_hidden_states = self.dropout(last_hidden_states)

        encoder_states, final_state = self.BiLSTM(last_hidden_states, mask)

        mask = mask.view(N, S, 1)

        intermediate = F.gelu(self.linear1(final_state))
        binary_prob = T.sigmoid(self.linear2(intermediate))
        classes_prob = self.linear3(intermediate)

        return binary_prob, classes_prob
Пример #21
0
def test_gelu_activation(N=50):
    from numpy_ml.neural_nets.activations import GELU

    N = np.inf if N is None else N

    i = 0
    while i < N:
        n_dims = np.random.randint(1, 100)
        z = random_stochastic_matrix(1, n_dims)
        approx = np.random.choice([True, False])

        mine = GELU(approximate=False)
        mine_approx = GELU(approximate=True)
        gold = lambda z: F.gelu(torch.FloatTensor(z)).numpy()
        np.testing.assert_allclose(mine.fn(z), gold(z), rtol=1e-3)
        assert_almost_equal(mine.fn(z), mine_approx.fn(z))

        print("PASSED")
        i += 1
Пример #22
0
    def forward(self, lv, ls, concat_connection=None):

        ls.set_values(lv)

        #similar to densenet and resnet: bn, relu, conv
        if self.norm is None:
            self.norm = GroupNormLatticeModule(lv.shape[1])
        lv, ls=self.norm(lv,ls)
        lv=F.gelu(lv)
        ls.set_values(lv)
        lv_1, ls_1 = self.coarse(lv, ls)
        ls_1.set_values(lv_1)

        if concat_connection is not None:
            lv_1=torch.cat((lv_1, concat_connection),1)
            ls_1.set_values(lv_1)


        return lv_1, ls_1
Пример #23
0
        def forward_ref(self, input, mask):

            i = 0
            output = input
            for l in range(self.num_layers):
                output = F.linear(output, self.weights[l], self.biases[l])

                dropout_mask = mask[i:i + output.numel()]
                pinv = 1 / (1 - self.dropout)
                if l < self.num_layers - 1:
                    # print(mask.size())
                    # output = fast_silu(output) * dropout_mask.view(output.size(0), -1) * pinv
                    # output = GELUFunction.apply(output) * dropout_mask.view(output.size(0), -1) * pinv
                    output = F.gelu(output) * dropout_mask.view(
                        output.size(0), -1) * pinv

                i += output.numel()

            return output
Пример #24
0
    def forward(self, x, mask):

        N, S = x.size()

        last_hidden_states, _, all_hidden_states = self.BERT(x, attention_mask=mask)

        last_six_hidden_states = all_hidden_states[-6:]
        concated_layers = T.cat(last_six_hidden_states, dim=-1)
        concated_layers = concated_layers.view(N*S, 6, 768)

        _, fused_layers = self.layer_fusion(concated_layers)

        fused_layers = fused_layers.view(N*S, 768)
        fused_layers = fused_layers.view(N, S, 768)

        fused_layers = self.dropout(fused_layers)

        encoder_states, final_state = self.BiLSTM(fused_layers, mask)

        mask = mask.view(N, S, 1)

        # Attention Mechanism

        attention_mask = T.where(mask == 0.0, self.neg_inf, self.zeros)
        attention_mask = attention_mask.view(N, S)

        encoder_states = encoder_states.view(N*S, 2*self.hidden_size)

        attn_scores = self.linear_attn_2(T.tanh(self.linear_attn_1(encoder_states)))
        attn_scores = attn_scores.view(N, S)
        attn_scores = attn_scores+attention_mask
        attn_scores = F.softmax(attn_scores, dim=-1)
        attn_scores = attn_scores.view(N, S, 1)

        encoder_states = encoder_states.view(N, S, 2*self.hidden_size)

        context_vector = T.sum(attn_scores*encoder_states, dim=1)

        intermediate = F.gelu(self.linear1(context_vector))
        binary_prob = T.sigmoid(self.linear2(intermediate))
        classes_prob = self.linear3(intermediate)

        return binary_prob, classes_prob
Пример #25
0
    def forward(self, xyz1, xyz2, points1, points2):
        """
        Input:
            xyz1: input points position data, [B, C, N]
            xyz2: sampled input points position data, [B, C, S]
            points1: input points data, [B, D, N]
            points2: input points data, [B, D, S]
        Return:
            new_points: upsampled points data, [B, D', N]
        """
        # xyz1 = xyz1.permute(0, 2, 1)
        # xyz2 = xyz2.permute(0, 2, 1)

        points2 = points2.permute(0, 2, 1)
        B, N, C = xyz1.shape
        _, S, _ = xyz2.shape

        if S == 1:
            interpolated_points = points2.repeat(1, N, 1)
        else:
            dists = square_distance(xyz1, xyz2)
            dists, idx = dists.sort(dim=-1)
            dists, idx = dists[:, :, :3], idx[:, :, :3]  # [B, N, 3]

            dist_recip = 1.0 / (dists + 1e-8)
            norm = torch.sum(dist_recip, dim=2, keepdim=True)
            weight = dist_recip / norm
            interpolated_points = torch.sum(index_points(points2, idx) *
                                            weight.view(B, N, 3, 1),
                                            dim=2)

        if points1 is not None:
            points1 = points1.permute(0, 2, 1)
            new_points = torch.cat([points1, interpolated_points], dim=-1)
        else:
            new_points = interpolated_points

        new_points = new_points.permute(0, 2, 1)
        for i, conv in enumerate(self.mlp_convs):
            bn = self.mlp_bns[i]
            new_points = F.gelu(bn(conv(new_points)))
        return new_points
Пример #26
0
    def forward(self, batch):
        """
        :param batch: list[str], list of sentences (NOTE: untokenized, continuous sentences)
        :return: pre_softmax, torch.tensor of shape (batch_size, n_class)
        """
        b_input_ids = batch[0]
        b_input_mask = batch[1]
        b_meta_features = batch[2]

        pooled_output = self.bert(input_ids=b_input_ids,
                                  attention_mask=b_input_mask)

        output = pooled_output[0]
        pooled_output = output[:, 0]  # Retrieve the first hidden state

        pooled_output = torch.cat([pooled_output, b_meta_features], dim=-1)
        pooled_output = F.gelu(self.hidden(self.dropout(pooled_output)))
        logits = self.classifier(pooled_output)

        return (logits, )  # add hidden states and attention if they are here
Пример #27
0
    def forward(self, x):
        bsize, feats, num_pts = x.size()

        # x0 = get_graph_feature(x, k=self.k)     # (bsize, 3, num_points) -> (bsize, 3*2, num_points, k)
        # t = self.transform_net(x0)              # (bsize, 3, 3)
        # x = x.transpose(2, 1)                   # (bsize, 3, num_points) -> (bsize, num_points, 3)
        # x = torch.bmm(x, t)                     # (bsize, num_points, 3) * (bsize, 3, 3) -> (bsize, num_points, 3)
        # x = x.transpose(2, 1)

        feature = F.gelu(self.conv1(x, x))
        x, feature = self.pool1(x, feature, num_pts // 4)
        # x1 = feature[:, :, :num_pts // 32]

        feature = F.gelu(self.conv2(x, feature))
        x, feature = self.pool2(x, feature, num_pts // 8)
        # x2 = feature[:, :, :num_pts // 32]

        feature = F.gelu(self.conv3(x, feature))
        x, feature = self.pool3(x, feature, num_pts // 16)
        # x3 = feature[:, :, :num_pts // 32]

        feature = F.gelu(self.conv4(x, feature))
        x, feature = self.pool4(x, feature, num_pts // 32)

        feature = F.gelu(self.conv6(x, feature))
        # _, x4 = self.pooling(x, feature, num_pts // 32)

        # x = torch.cat((x1, x2, x3, x4), dim=1)
        x = F.gelu(self.conv5(feature))
        x1 = F.adaptive_max_pool1d(x, 1).view(bsize, -1)
        x2 = F.adaptive_avg_pool1d(x, 1).view(bsize, -1)
        x = torch.cat((x1, x2), 1)

        x = F.gelu(self.bn6(self.linear1(x)))
        x = self.dp1(x)
        x = F.gelu(self.bn7(self.linear2(x)))
        x = self.dp2(x)
        x = self.linear3(x)
        return x
Пример #28
0
    def forward(self, x, seed):
        # [batch, length, d_model]
        x = x.reshape(-1, x.size(1) // self.chunk, x.size(2))
        # [batch * chunk, length // chunk, d_model]
        output = F.gelu(self.linear1(x))
        # [batch * chunk, length // chunk, d_ff]
        if self.training:
            generator = torch.Generator(device=output.get_device())
            generator.manual_seed(seed)
            dropout_mask = torch.bernoulli(output,
                                           p=1 - self.dropout,
                                           generator=generator)
            output = dropout_mask * output / (1 - self.dropout)

        output = self.linear2(output)
        # [batch * chunk, length // chunk, d_model]
        output = output.reshape(-1,
                                output.size(1) * self.chunk, output.size(2))
        # [batch, length, d_model]
        return output
Пример #29
0
    def forward(self, x):
        x_key_padding_mask = (x == 0).clone().detach(
        )  # zero out the attention of empty sequence elements
        x = self.embedding(x.transpose(1, 0).int())  # [seq, batch]
        x = self.positionalEncoder(x)
        #x = self.encoder(x,src_key_padding_mask=x_key_padding_mask)
        #x = x.permute(1,0,2).reshape(x_key_padding_mask.shape[0], int(self.embedDim*self.maxLen))
        for i in range(len(self.self_attn_layers)):
            x = self.self_attn_layers[i](
                x, x, x, key_padding_mask=x_key_padding_mask)[0]
            x = self.encoder_linear[i](x)

        x = x.mean(dim=0)  # mean aggregation
        for i in range(len(self.decoder_layers)):
            x = F.gelu(self.decoder_layers[i](x))
            x = self.decoder_dropouts[i](x)

        x = self.output_layer(x)

        return x
Пример #30
0
    def forward(self, state, ir_state, action):
        sa = torch.cat([state, ir_state, action], 1)
        q1 = F.gelu(self.f1(sa))
        q1 = F.gelu(self.f2(q1))
        q1 = F.gelu(self.f3(q1))
        q1 = F.selu(self.f4(q1))
        q1 = self.f5(q1)

        q2 = F.gelu(self.l1(sa))
        q2 = F.gelu(self.l2(q2))
        q2 = F.gelu(self.l3(q2))
        q2 = F.selu(self.f4(q2))
        q2 = self.f5(q2)

        return q1, q2