def __init__(self, n_words, input_size, hidden_size, bridge_type="mlp", dropout_rate=0.0): super(Decoder, self).__init__() self.bridge_type = bridge_type self.hidden_size = hidden_size self.context_size = hidden_size * 2 self.embedding = Embeddings(num_embeddings=n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.cgru_cell = CGRUCell(input_size=input_size, hidden_size=hidden_size) self.linear_input = nn.Linear(in_features=input_size, out_features=input_size) self.linear_hidden = nn.Linear(in_features=hidden_size, out_features=input_size) self.linear_ctx = nn.Linear(in_features=hidden_size * 2, out_features=input_size) self.dropout = nn.Dropout(dropout_rate) self._reset_parameters() self._build_bridge()
def __init__(self, n_src_vocab, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1, dim_per_head=None): super().__init__() self.num_layers = n_layers self.embeddings = Embeddings(num_embeddings=n_src_vocab, embedding_dim=d_word_vec, dropout=dropout, add_position_embedding=True) self.block_stack = nn.ModuleList([ EncoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout, dim_per_head=dim_per_head) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model)
def __init__(self, vocab_size, embedding_size=300, hidden_size=512, num_layers=2, dropout=0.3, shared_weight=True, **kwargs): super().__init__() self.embedding = Embeddings(num_embeddings=vocab_size, embedding_dim=embedding_size, dropout=dropout) self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=False, dropout=dropout, batch_first=True) # 输出层 self.output = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, embedding_size)) # 投影层 self.proj = nn.Linear(embedding_size, vocab_size, bias=False) if shared_weight: self.proj.weight = self.embedding.embeddings.weight else: my_init.default_init(self.proj.weight)
def __init__(self, n_tgt_vocab, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super(Decoder, self).__init__() self.n_head = n_head self.num_layers = n_layers self.d_model = d_model self.embeddings = Embeddings(n_tgt_vocab, d_word_vec, dropout=dropout, add_position_embedding=True) self.block_stack = nn.ModuleList([ DecoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout) for _ in range(n_layers) ]) self.out_layer_norm = nn.LayerNorm(d_model)
def __init__( self, n_src_words, n_trg_words, d_word_vec, d_model, dropout=0.0, **kwargs, ): super(TransDiscriminator, self).__init__() # the embedding is pre-trained and without dropout layer self.src_embedding = Embeddings(num_embeddings=n_src_words, embedding_dim=d_word_vec, dropout=dropout, add_position_embedding=False) self.trg_embedding = Embeddings(num_embeddings=n_trg_words, embedding_dim=d_word_vec, dropout=dropout, add_position_embedding=False) if not kwargs["update_embedding"]: for param in self.src_embedding.parameters(): param.requires_grad = False for param in self.trg_embedding.parameters(): param.requires_grad = False self.src_gru = RNN(type="gru", batch_first=True, input_size=d_word_vec, hidden_size=d_model, bidirectional=True) self.trg_gru = RNN(type="gru", batch_first=True, input_size=d_word_vec, hidden_size=d_model, bidirectional=True) # twice of the bi-GRN dimension self.layer_norm = nn.LayerNorm(d_model * 4, elementwise_affine=True) # whether the (x,y) is a translation pair self.ffn = nn.Linear(in_features=4 * d_model, out_features=2) self.dropout = nn.Dropout(dropout)
def __init__(self, n_src_vocab, char_src_vocab=-1, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1, dim_per_head=None, padding_idx=PAD, positional_embedding="sin", layer_norm_first=True, ffn_activation="relu"): super().__init__() self.scale = d_word_vec**0.5 self.num_layers = n_layers self.layer_norm_first = layer_norm_first self.embeddings = Embeddings(num_embeddings=n_src_vocab, embedding_dim=d_word_vec, dropout=dropout, positional_embedding=positional_embedding) self.char_embeddings = Embeddings(num_embeddings=char_src_vocab, embedding_dim=d_word_vec, dropout=dropout) self.layer_stack = nn.ModuleList([ EncoderLayer(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout, dim_per_head=dim_per_head, layer_norm_first=layer_norm_first, ffn_activation=ffn_activation, contain_char_attn=True) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model)
def __init__(self, n_words, input_size, hidden_size): super(Encoder, self).__init__() # Use PAD self.embeddings = Embeddings(num_embeddings=n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.gru = RNN(type="gru", batch_first=True, input_size=input_size, hidden_size=hidden_size, bidirectional=True)
def __init__(self, n_words, input_size, hidden_size, dropout_rate=0.0): super(DisentangleRNNDecoder, self).__init__() self.hidden_size = hidden_size self.embeddings = Embeddings(num_embeddings=n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.cell = GRUAttnCell(input_size=input_size, hidden_size=hidden_size) self.linear_hidden = nn.Linear(in_features=hidden_size, out_features=input_size) # self.linear_ctx = nn.Linear(in_features=context_size, out_features=input_size) self.dropout = nn.Dropout(dropout_rate)
def __init__(self, n_tgt_vocab, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dim_per_head=None, dropout=0.1, positional_embedding="sin", layer_norm_first=True, padding_idx=PAD, ffn_activation="relu"): super(Decoder, self).__init__() self.n_head = n_head self.num_layers = n_layers self.d_model = d_model self.layer_norm_first = layer_norm_first self.embeddings = Embeddings(n_tgt_vocab, d_word_vec, dropout=dropout, positional_embedding=positional_embedding, padding_idx=padding_idx) self.layer_stack = nn.ModuleList([ DecoderLayer(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout, dim_per_head=dim_per_head, layer_norm_first=layer_norm_first, ffn_activation=ffn_activation, contain_char_attn=True) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model) self._dim_per_head = dim_per_head
def __init__(self, n_words, action_space=2, action_roll_steps=1, d_word_vec=512, d_model=256, dropout=0.0, **kwargs): super(Attacker, self).__init__() self.action_roll_steps = action_roll_steps self.action_space = action_space self.input_size = d_word_vec self.hidden_size = d_model self.src_embedding = Embeddings(num_embeddings=n_words, embedding_dim=self.input_size, dropout=dropout, add_position_embedding=False) # label representation self.src_gru = RNN(type="gru", batch_first=True, input_size=self.input_size, hidden_size=self.hidden_size, bidirectional=True) # inputs: current input, avg_seqs as ctx self.ctx_linear = nn.Linear(in_features=2*self.hidden_size, out_features=self.hidden_size) self.input_linear = nn.Linear(in_features=self.input_size, out_features=self.hidden_size) # layer norm for inputs feature self.layer_norm = nn.LayerNorm(self.hidden_size, elementwise_affine=True) # outputs: actor distribution and critic value self.attacker_linear = nn.Linear(in_features=self.hidden_size, out_features=self.action_space) self.critic_linear = nn.Linear(in_features=self.hidden_size, out_features=1) self.dropout = nn.Dropout(dropout) self._reset_parameters()
def build_translate_model(victim_config, victim_model_path, vocab_src, vocab_trg, device, ): """ build translation env, the nmt_model without *src* embedding, and the corresponding(separated) embedding layers. :param victim_config: victim configs :param victim_model_path: victim_models :param vocab_src: source vocabulary :param vocab_trg: target vocabulary :param device: map location (cpu or cuda:*) :return: embeddings layers, nmt_models without source embedding used in the beam-search """ translate_model_configs = victim_config["model_configs"] src_emb = None trg_emb = None nmt_model = None # build model for translation (w/o src embedding) if translate_model_configs["model"] == "Transformer": src_emb = Embeddings( num_embeddings=vocab_src.max_n_words, embedding_dim=translate_model_configs["d_word_vec"], dropout=translate_model_configs["dropout"], add_position_embedding=True) trg_emb = Embeddings( num_embeddings=vocab_trg.max_n_words, embedding_dim=translate_model_configs["d_word_vec"], dropout=translate_model_configs["dropout"], add_position_embedding=True) nmt_model = TransformerTranslator(n_tgt_vocab=vocab_trg.max_n_words, **translate_model_configs) elif translate_model_configs["model"] == "DL4MT": src_emb = Embeddings( num_embeddings=vocab_src.max_n_words, embedding_dim=translate_model_configs["d_word_vec"], dropout=0.0, add_position_embedding=False) trg_emb = Embeddings( num_embeddings=vocab_trg.max_n_words, embedding_dim=translate_model_configs["d_word_vec"], dropout=0.0, add_position_embedding=False) nmt_model = Dl4mtTranslator(n_tgt_vocab=vocab_trg.max_n_words, **translate_model_configs) else: INFO("unregistered model type of victim in config") src_emb.to(device) trg_emb.to(device) nmt_model.to(device) INFO("load params to device %s" % device) state_dict = load_translate_model(victim_model_path, map_location=device) _src_dict = dict() _trg_dict = dict() _model_dict = dict() for name, _ in nmt_model.state_dict().items(): # when shared_proj the Generator will be tensor object instead of nn.Module if name in state_dict.keys(): _model_dict[name] = state_dict[name] else: print("error, fail to locate %s for model in the state_dict" % name) for name, _ in src_emb.state_dict().items(): if "encoder.embeddings."+name in state_dict.keys(): _src_dict[name] = state_dict["encoder.embeddings."+name] else: print("error, fail to locate %s for src_emb in the state_dict" % name) for name, _ in trg_emb.state_dict().items(): if "decoder.embeddings."+name in state_dict.keys(): _trg_dict[name] = state_dict["decoder.embeddings."+name] else: print("error, fail to locate %s for trg_emb in the state_dict" % name) del state_dict src_emb.load_state_dict(_src_dict) trg_emb.load_state_dict(_trg_dict) nmt_model.load_state_dict(_model_dict) src_emb.eval() trg_emb.eval() nmt_model.eval() INFO("finished building translation model(w/o src embedding layer) for environment on %s" % device) return src_emb, trg_emb, nmt_model
def __init__(self, n_tgt_vocab, n_layers=6, n_head=8, capsule_type="output", routing_type="dynamic_routing", comb_type="ffn", dim_capsule=100, num_capsules=8, null_capsule=False, d_word_vec=512, d_model=512, d_inner_hid=1024, dim_per_head=None, dropout=0.1): super(Decoder, self).__init__() self.n_head = n_head self.num_layers = n_layers self.d_model = d_model self.embeddings = Embeddings(n_tgt_vocab, d_word_vec, dropout=dropout, add_position_embedding=True) self.block_stack = nn.ModuleList([ DecoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout, dim_per_head=dim_per_head, dim_capsule=dim_capsule, num_capsules=num_capsules if capsule_type.startswith("layer-wise") else 0, null_capsule=null_capsule) for _ in range(n_layers) ]) self.out_layer_norm = nn.LayerNorm(d_model) self._dim_per_head = dim_per_head # # # contextual capsule layer # if capsule_type == "output": # self.apply_output_capsule = True # self.pre_capsule_layer_norm = nn.LayerNorm(d_model) # # assert dim_capsule % num_capsules == 0 # self.dim_per_cap = dim_capsule // num_capsules # # self.null_caps = null_capsule # if null_capsule: # INFO("Using Null Capsules to attract irrelevant routing.") # # total_num_capsules = num_capsules if not self.null_caps else int(num_capsules * 1.5) # # self.routing_type = routing_type # if routing_type == "dynamic_routing": # self.final_capsule_layer = ContextualCapsuleLayer( # num_out_caps=total_num_capsules, num_in_caps=None, # dim_in_caps=d_model, # dim_out_caps=self.dim_per_cap, # dim_context=d_model, # num_iterations=3, # share_route_weights_for_in_caps=True) # # elif routing_type == "EM_routing": # self.final_capsule_layer = EMContextualCapsuleLayer( # num_out_caps=total_num_capsules, num_in_caps=None, # dim_in_caps=d_model, dim_out_caps=self.dim_per_cap, dim_context=d_model, # num_iterations=3, # share_route_weights_for_in_caps=True) # # dim_per_part = dim_capsule // 2 # if comb_type == "ffn": # self.out_and_cap_ffn = MultiInputPositionwiseFeedForward( # size=d_model, hidden_size=d_inner_hid, dropout=dropout, # inp_sizes=[dim_per_part, dim_per_part] # ) # elif comb_type == "gate": # self.out_and_cap_ffn = MultiInputGates( # d_model=d_model, input_sizes=[dim_per_part, dim_per_part], # dropout=dropout # ) # else: # self.apply_output_capsule = False if capsule_type == "layer-wise-share": for i in range(1, n_layers): self.block_stack[i].capsule_layer = self.block_stack[ 0].capsule_layer self.block_stack[i].out_and_cap_ffn = self.block_stack[ 0].out_and_cap_ffn