def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs): self.value_pad_token =-10000 self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.col_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) # Number of tokens self.col_q_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True) self.hs_q_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True) self.tokens_num_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, self.max_num_tokens)) # num of tokens: 1-6 # tokens self.col_q = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False) self.hs_q = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False) self.W_value = nn.Linear(in_features=hidden_dim, out_features=hidden_dim) self.value_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1)) pos_weight = torch.tensor(3).double() if gpu: pos_weight = pos_weight.cuda() self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs): self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.col_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.q_cs = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True) self.hs_cs = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True) self.W_cs = nn.Linear(in_features=hidden_dim, out_features=hidden_dim) self.hv_out = nn.Sequential(nn.Tanh(), nn.Linear(self.hidden_dim, 2))
def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs): self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.kw_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) # preprocess the num self.q_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True) self.hs_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True) self.kw_num_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 4)) # preprocess the value self.q_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False) self.hs_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False) self.W_kw = nn.Linear(in_features=hidden_dim, out_features=hidden_dim) self.kw_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1)) self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=3 * torch.tensor(3).cuda().double())
def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs): self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.bag_of_word = BagOfWord() self.W_q = nn.Linear(hidden_dim, hidden_dim) self.W_hs = nn.Linear(hidden_dim, hidden_dim) self.ao_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 2))
def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs): self.col_pad_token = -10000 self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.col_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.q_col_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True) self.hs_col_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True) self.col_num_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, self.num)) # num of cols: 1-6 # predicting number of columns self.col_rep_out = nn.Sequential( nn.Tanh(), nn.Linear(hidden_dim, 4)) # num of repeats: 0-3 # columns self.q_col = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False) self.hs_col = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False) self.W_col = nn.Linear(in_features=hidden_dim, out_features=hidden_dim) self.col_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1)) pos_weight = torch.tensor(3).double() if gpu: pos_weight = pos_weight.cuda() self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=pos_weight) self.cosine_loss = nn.CosineEmbeddingLoss()
def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs): self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.kw_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim // 2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True) self.q_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True) self.hs_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True) self.kw_num_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 4)) # num of key words: 0-3 self.q_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False) self.hs_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False) self.W_kw = nn.Linear(in_features=hidden_dim, out_features=hidden_dim) self.kw_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1)) # TODO: Where does this 3 number come from? number of classes? # Answer: pos_weight is a number that indicates how to balance positive to negative examples of a class # eg. for 1 class with 1 postive and 3 negative, set pos_weight to 3 such that the loss acts as if there where 3 positive examples self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=3 * torch.tensor(3).cuda().double())