def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general"): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # Build the RNN. #LSTM self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory( context_gate, self._input_size, hidden_size, hidden_size, hidden_size ) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention( hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func ) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention( hidden_size, attn_type=copy_attn_type, attn_func=attn_func ) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.")
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general"): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory( context_gate, self._input_size, hidden_size, hidden_size, hidden_size ) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention( hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func ) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention( hidden_size, attn_type=copy_attn_type, attn_func=attn_func ) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.")
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False): super(RNNDecoderBase, self).__init__() self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory(context_gate, self._input_size, hidden_size, hidden_size, hidden_size) # Set up the standard attention. self._coverage = coverage_attn self.attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func) if copy_attn and not reuse_copy_attn: self.copy_attn = GlobalAttention(hidden_size, attn_type=attn_type, attn_func=attn_func) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general", num_emotion_classes=0, emotion_emb_size=0, generic_vocab_indices=None, emotion_vocab_indices=None, eds_type=0, no_clf_loss=False, no_eds_attention=False): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) self.embedding_size = self.embeddings.embedding_size self.vocab_size = self.embeddings.word_vocab_size self.eds_type = eds_type self.no_clf_loss = no_clf_loss self.no_eds_attention = no_eds_attention # Emotion embedding # init_logger() self.num_emotion_classes = num_emotion_classes self.emotion_emb_size = emotion_emb_size rnn_input_size = self._input_size if num_emotion_classes != 0 and emotion_emb_size != 0: self.emo_embedding = nn.Embedding(num_emotion_classes, emotion_emb_size) rnn_input_size += emotion_emb_size # EDS model self.generic_vocab_indices = None # a 1D list self.emotion_vocab_indices = None # a 2D list if generic_vocab_indices is not None: if not self.no_eds_attention: rnn_input_size *= 2 # one from word embedding and another from emotion embedding self.all_vocab_indices = nn.Parameter(torch.arange(0, self.vocab_size, dtype=torch.long), requires_grad=False) self.generic_vocab_indices = nn.Parameter(torch.LongTensor(generic_vocab_indices), requires_grad=False) self.emotion_vocab_indices = nn.Parameter(torch.LongTensor(emotion_vocab_indices), requires_grad=False) self.generic_vocab_size = self.generic_vocab_indices.size(0) self.emotion_vocab_size = self.emotion_vocab_indices.size(1) self.num_emotions = self.emotion_vocab_indices.size(0) self.alpha = nn.Parameter(torch.zeros(hidden_size)) self.beta = nn.Parameter(torch.zeros(hidden_size)) self.gamma = nn.Parameter(torch.zeros(self.embedding_size)) self.emotion_classifier = nn.Linear(self.embedding_size, self.num_emotions) self.generic_mask = nn.Parameter(torch.zeros(self.vocab_size), requires_grad=False) self.generic_mask[self.generic_vocab_indices] = 1 other_emotion_indices = [] flattened_emotion_vocab_indices = [i for e in emotion_vocab_indices for i in e] for i in range(len(emotion_vocab_indices)): other_emotion_indices.append(list(set(flattened_emotion_vocab_indices).difference(set(emotion_vocab_indices[i])))) self.other_emotion_indices = nn.Parameter(torch.LongTensor(other_emotion_indices), requires_grad=False) self.all_emotion_indices = nn.Parameter(torch.LongTensor(list(set(flattened_emotion_vocab_indices))), requires_grad=False) self.vocab_embedding = nn.Parameter(self.embeddings(self.all_vocab_indices.unsqueeze(0).unsqueeze(-1)).squeeze(0), requires_grad=False) # (vocab, emb_size) # print(self.all_vocab_indices.shape) # print(self.generic_vocab_indices.shape) # print(self.emotion_vocab_indices.shape) # print(self.other_emotion_indices.shape) # Decoder state self.state = {} # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=rnn_input_size, # input_size=self._input_size hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory( context_gate, self._input_size, hidden_size, hidden_size, hidden_size ) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention( hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func ) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention( hidden_size, attn_type=copy_attn_type, attn_func=attn_func ) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.")
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, teacher_forcing="teacher", copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general"): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) self.teacher_forcing = teacher_forcing # Decoder state self.state = {} self.lin = nn.Linear(self.hidden_size, 100) # This line! # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) self.eval_status = False # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory(context_gate, self._input_size, hidden_size, hidden_size, hidden_size) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention(hidden_size, attn_type=copy_attn_type, attn_func=attn_func) else: self.copy_attn = None self.vocab_size = 0 #Only used by student-forcing, rand, and dist self.generator = None #Only used by student-forcing, rand, and dist self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.")
def __init__( self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general", target_encoder_type=None, detach_target_encoder=False, ): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # @memray: hack to change size for target encoding self.input_size = self._input_size if target_encoder_type == 'none': target_encoder_type = None if target_encoder_type is not None: self.input_size += self.hidden_size # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self.input_size, hidden_size=hidden_size, num_layers=1, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory(context_gate, self.input_size, hidden_size, hidden_size, hidden_size) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention(hidden_size, attn_type=copy_attn_type, attn_func=attn_func) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.") # @memray # Build the Target Encoder. Feed its output to the decoder as auxiliary input self.target_encoder_type = target_encoder_type self.target_encoder = None if target_encoder_type == 'rnn': self.target_encoder = self._build_rnn( "GRU", input_size=self.embeddings.embedding_size, hidden_size=hidden_size, num_layers=1, dropout=dropout) self.detach_target_encoder = detach_target_encoder self.bilinear_layer = nn.Bilinear(in1_features=hidden_size, in2_features=hidden_size, out_features=1)
def __init__( self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general", num_srcs: int = 1, ): super().__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) self.num_srcs = num_srcs # Decoder state self.state = {} # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Hidden state merging self.hidden_active_0 = nn.LeakyReLU() self.hidden_merge_0 = nn.Linear( in_features=self.hidden_size * self.num_srcs, out_features=self.hidden_size, ) if rnn_type == "LSTM": self.hidden_active_1 = nn.LeakyReLU() self.hidden_merge_1 = nn.Linear( in_features=self.hidden_size * self.num_srcs, out_features=self.hidden_size, ) # end if # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory(context_gate, self._input_size, hidden_size, hidden_size, hidden_size) # end if # Set up the standard attention. assert not coverage_attn, "Coverage attention is not supported" self._coverage: bool = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.ms_attn = None else: self.ms_attn = MultiSourceAPGlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func) # end if # Copy attention if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_ms_attn = MultiSourceAPGlobalAttention( hidden_size, attn_type=copy_attn_type, attn_func=attn_func) else: self.copy_ms_attn = None # end if self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.") return