def __init__(self, input_size, hidden_size):
     super(Dl4mtEncoder, self).__init__()
     self.gru = RNN(type="gru",
                    batch_first=True,
                    input_size=input_size,
                    hidden_size=hidden_size,
                    bidirectional=True)
示例#2
0
 def __init__(self, generator, **config):
     super().__init__()
     self.generator = generator
     self.gru = RNN(type="gru",
                    batch_first=True,
                    input_size=config['d_model'],
                    hidden_size=config['d_model'])
     self.linear = nn.Linear(config['d_model'], config['d_word_vec'])
示例#3
0
    def __init__(self,
                 feature_size=768,
                 hidden_size=512,
                 dropout_rate=0.1,
                 **kwargs):

        super(QE_PAIR, self).__init__()

        # Use PAD
        self.gru = RNN(type="gru",
                       batch_first=True,
                       input_size=feature_size,
                       hidden_size=hidden_size,
                       bidirectional=True)
        self.lstm = RNN(type="lstm",
                        batch_first=True,
                        input_size=feature_size,
                        hidden_size=hidden_size,
                        bidirectional=True)
        self.lstm_src = RNN(type="lstm",
                            batch_first=True,
                            input_size=feature_size,
                            hidden_size=hidden_size,
                            bidirectional=True)
        self.lstm_mt = RNN(type="lstm",
                           batch_first=True,
                           input_size=feature_size,
                           hidden_size=hidden_size,
                           bidirectional=True)
        self.w = nn.Linear(2 * hidden_size, 1)
        my_init.default_init(self.w.weight)

        self.w_all = nn.Linear(2 * 2 * hidden_size, 1)
        my_init.default_init(self.w_all.weight)

        self.w_1 = nn.Linear(2 * hidden_size, 1)
        my_init.default_init(self.w_1.weight)

        self.w_2 = nn.Linear(2 * hidden_size, 1)
        my_init.default_init(self.w_2.weight)

        self.dropout = nn.Dropout(dropout_rate)
        self.sigmoid = nn.Sigmoid()
示例#4
0
    def __init__(
        self,
        n_src_words,
        n_trg_words,
        d_word_vec,
        d_model,
        dropout=0.0,
        **kwargs,
    ):
        super(TransDiscriminator, self).__init__()
        # the embedding is pre-trained and without dropout layer
        self.src_embedding = Embeddings(num_embeddings=n_src_words,
                                        embedding_dim=d_word_vec,
                                        dropout=dropout,
                                        add_position_embedding=False)
        self.trg_embedding = Embeddings(num_embeddings=n_trg_words,
                                        embedding_dim=d_word_vec,
                                        dropout=dropout,
                                        add_position_embedding=False)
        if not kwargs["update_embedding"]:
            for param in self.src_embedding.parameters():
                param.requires_grad = False
            for param in self.trg_embedding.parameters():
                param.requires_grad = False

        self.src_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=d_word_vec,
                           hidden_size=d_model,
                           bidirectional=True)
        self.trg_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=d_word_vec,
                           hidden_size=d_model,
                           bidirectional=True)
        # twice of the bi-GRN dimension
        self.layer_norm = nn.LayerNorm(d_model * 4, elementwise_affine=True)

        # whether the (x,y) is a translation pair
        self.ffn = nn.Linear(in_features=4 * d_model, out_features=2)
        self.dropout = nn.Dropout(dropout)
示例#5
0
    def __init__(self,
                 d_model, n_head,
                 feature_size=1024,
                 hidden_size=512,
                 dropout=0.0,
                 **kwargs
                 ):
        super(QE_ATTENTION, self).__init__()

        self.ctx_attn = MultiHeadedAttention(head_count=n_head, model_dim=d_model, dropout=dropout,
                                             dim_per_head=None)

        # Use PAD
        self.gru = RNN(type="gru", batch_first=True, input_size=feature_size, hidden_size=hidden_size,
                       bidirectional=True)
        self.lstm = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size,
                        bidirectional=True)

        self.w = nn.Linear(2 * hidden_size, 1)
        my_init.default_init(self.w.weight)

        self.dropout = nn.Dropout(dropout)
        self.sigmoid = nn.Sigmoid()
示例#6
0
    def __init__(self, n_words, input_size, hidden_size):
        super(Encoder, self).__init__()

        # Use PAD
        self.embeddings = Embeddings(num_embeddings=n_words,
                                     embedding_dim=input_size,
                                     dropout=0.0,
                                     add_position_embedding=False)

        self.gru = RNN(type="gru",
                       batch_first=True,
                       input_size=input_size,
                       hidden_size=hidden_size,
                       bidirectional=True)
示例#7
0
 def __init__(self,
              victim_configs,
              victim_model_path,
              input_dim,
              d_model,
              dropout=0.0,
              **kwargs):
     super().__init__(victim_configs, victim_model_path, dropout)
     self.src_gru = RNN(type="gru",
                        batch_first=True,
                        input_size=input_dim,
                        hidden_size=d_model,
                        bidirectional=True)
     self.trg_gru = RNN(type="gru",
                        batch_first=True,
                        input_size=input_dim,
                        hidden_size=d_model,
                        bidirectional=True)
     self.dropout = nn.Dropout(dropout, inplace=True)
     self.layer_norm = nn.LayerNorm(d_model * 4, elementwise_affine=True)
     # single layer binary classification
     self.ffn = nn.Linear(in_features=4 * d_model, out_features=2)
     self._reset_parameters()
示例#8
0
    def __init__(self,
                 d_word_vec=512,
                 d_model=256,
                 limit_dist=0.1,
                 dropout=0.0,
                 reparam_noise=1e-6):
        super(Rephraser, self).__init__()
        self.input_size = d_word_vec
        self.action_dim = d_word_vec  # modification on embeddings
        self.hidden_size = d_model
        self.action_range = limit_dist  # action range
        self.reparam_noise = reparam_noise
        self.dropout_rate = dropout
        self.dropout = nn.Dropout(dropout)
        self.log_std_bound = [-5, 4]  # default log std bound

        # current sequence as ctx features
        self.src_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=self.input_size,
                           hidden_size=self.hidden_size,
                           bidirectional=True)

        # Linears for input step features: current embeddings, avg_seqs as ctx
        self.ctx_linear = nn.Linear(in_features=2 * self.hidden_size,
                                    out_features=self.hidden_size)
        self.input_linear = nn.Linear(in_features=self.input_size,
                                      out_features=self.hidden_size)

        # layer norm for inputs feature
        self.rephrase_LN = nn.LayerNorm(self.hidden_size,
                                        elementwise_affine=True)
        # outputs: actor policy distribution
        # Gaussian policy: mean and std;
        self.rephraser_linear_base_mu = nn.Linear(
            in_features=self.hidden_size, out_features=self.hidden_size)
        self.rephraser_linear_mu = nn.Linear(in_features=self.hidden_size,
                                             out_features=self.action_dim)
        self.rephraser_linear_base_log_sig = nn.Linear(
            in_features=self.hidden_size, out_features=self.hidden_size)
        self.rephraser_linear_log_sig = nn.Linear(in_features=self.hidden_size,
                                                  out_features=self.action_dim)
        # # intrinsic curiosity module
        # self.icm = IntrinsicPredictor(
        #     d_model=self.hidden_size, action_dim=self.action_dim, dropout=dropout)

        # initialize parameter
        self._reset_parameters()
示例#9
0
    def __init__(self,
                 d_word_vec=512,
                 d_model=256,
                 limit_dist=0.1,
                 dropout=0.0,
                 reparam_noise=1e-6):
        super(CriticNet, self).__init__()
        self.input_size = d_word_vec
        self.hidden_size = d_model
        self.limit_dist = limit_dist
        self.reparam_noise = reparam_noise
        self.dropout = nn.Dropout(dropout)

        # current sequences as ctx
        self.src_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=self.input_size,
                           hidden_size=self.hidden_size,
                           bidirectional=True)

        # Linear for input step features
        self.ctx_linear = nn.Linear(in_features=2 * self.hidden_size,
                                    out_features=self.hidden_size)
        self.input_linear = nn.Linear(in_features=self.input_size,
                                      out_features=self.hidden_size)
        self.action_linear = nn.Linear(in_features=self.input_size,
                                       out_features=self.hidden_size)
        self.critic_LN = nn.LayerNorm(self.hidden_size,
                                      elementwise_affine=True)

        # double-q trick with 2 critics (sharing input features),
        # Q(s_t, a_t) is the smaller one
        self.critic1_linear_base = nn.Linear(in_features=self.hidden_size,
                                             out_features=self.hidden_size)
        self.critic1_linear = nn.Linear(in_features=self.hidden_size,
                                        out_features=1)
        self.critic2_linear_base = nn.Linear(in_features=self.hidden_size,
                                             out_features=self.hidden_size)
        self.critic2_linear = nn.Linear(in_features=self.hidden_size,
                                        out_features=1)
        # initialize the parameter
        self._reset_parameters()
示例#10
0
 def __init__(self,
              victim_configs,
              victim_model_path,
              trg_vocab_emb,
              input_dim,
              d_model,
              dropout=0.0,
              **kwargs):
     """
     :param victim_configs: build trg_emb from victim.
     :param victim_model_path: build trg_emb from victim.
     :param input_dim: word embedding dim
     :param d_model: encoding dimension
     :param dropout: redundant parameter in Annunciater base class
     :param sample_amount: save memory with only sample_amount of tokens from the vocab
         (larger->better).
     :param kwargs: provide trg_vocab_emb
     """
     super().__init__(victim_configs, victim_model_path, dropout)
     # the perturbed emb should possess better high-level features indicating targets
     self.trg_vocab_emb = trg_vocab_emb
     self.sample_amount = kwargs["sample_amount"]
     if "density_temperature" in kwargs:
         self.density_temperature = kwargs["density_temperature"]
     else:  # to scale the density
         self.density_temperature = self.sample_amount**0.5
     # src encoding
     self.src_gru = RNN(type="gru",
                        batch_first=True,
                        input_size=input_dim,
                        hidden_size=d_model,
                        bidirectional=True)
     self.LN = nn.LayerNorm(d_model * 2, elementwise_affine=True)
     # prediction layer for inner product similarity (density ratio)
     self.scorer_ffn = nn.Linear(in_features=2 * d_model,
                                 out_features=input_dim)
     # the "reference" for the density ratio, direct trg_emb or the victim enc representation of original src
     self._reset_parameters()  # init parameter
示例#11
0
    def __init__(self,
                 n_words,
                 action_space=2,
                 action_roll_steps=1,
                 d_word_vec=512,
                 d_model=256,
                 dropout=0.0,
                 **kwargs):
        super(Attacker, self).__init__()
        self.action_roll_steps = action_roll_steps
        self.action_space = action_space
        self.input_size = d_word_vec
        self.hidden_size = d_model
        self.src_embedding = Embeddings(num_embeddings=n_words,
                                        embedding_dim=self.input_size,
                                        dropout=dropout,
                                        add_position_embedding=False)
        # label representation
        self.src_gru = RNN(type="gru", batch_first=True, input_size=self.input_size,
                           hidden_size=self.hidden_size, bidirectional=True)

        # inputs: current input, avg_seqs as ctx
        self.ctx_linear = nn.Linear(in_features=2*self.hidden_size,
                                    out_features=self.hidden_size)
        self.input_linear = nn.Linear(in_features=self.input_size,
                                      out_features=self.hidden_size)
        # layer norm for inputs feature
        self.layer_norm = nn.LayerNorm(self.hidden_size, elementwise_affine=True)

        # outputs: actor distribution and critic value
        self.attacker_linear = nn.Linear(in_features=self.hidden_size,
                                         out_features=self.action_space)
        self.critic_linear = nn.Linear(in_features=self.hidden_size,
                                       out_features=1)
        self.dropout = nn.Dropout(dropout)

        self._reset_parameters()