def __init__(self, config, evidence_lambda=0.8, freeze_predictor: bool = False): super(BertQAYesnoHierarchicalTopKfp32, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda if freeze_predictor: for param in self.yesno_predictor.parameters(): param.requires_grad = False self.freeze_predictor = freeze_predictor self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0): super(BertQAYesnoHierarchicalNeg, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.negative_lam = negative_lambda self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, view_id=1, split_type=0): super(BertQAYesnoHierarchicalTwoViewTopK, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') print(f'The coefficient of view id is {view_id}') total_dim = 768 rank_list = list(range(total_dim)) self.split_type = split_type if split_type == 0: self.view_ranks = [ rank_list[:int(.5 * total_dim)], rank_list[int(.5 * total_dim):] ] elif split_type == 1: self.view_ranks = [rank_list[::2], rank_list[1::2]] elif split_type == 2: random.seed(19970417) random.shuffle(rank_list) self.view_ranks = [ rank_list[:int(.5 * total_dim)], rank_list[int(.5 * total_dim):] ] elif split_type == 3: random.seed(20190914) random.shuffle(rank_list) self.view_ranks = [ rank_list[:int(.5 * total_dim)], rank_list[int(.5 * total_dim):] ] else: raise ValueError("split type should be 0/1/2/3, but found %d" % (split_type)) print(self.view_ranks[view_id][:20]) layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) config.hidden_size = int(config.hidden_size / 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.view_id = view_id self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0, add_entropy: bool = False, split_num: int = 3, split_index: int = 0): super(BertQAYesnoHierarchicalNegHalf, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') print(f'The coefficient of negative samples loss is {negative_lambda}') print(f'Add entropy loss: {add_entropy}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.split_num = split_num self.split_size = config.hidden_size // self.split_num self.split_index = split_index self.split_sizes = [ ((i - 1) * self.split_size, i * self.split_size) for i in range(1, split_num) ] + [((split_num - 1) * self.split_size, config.hidden_size)] print(f'Split BERT output into {self.split_num}.') print(f'Current model use the {self.split_index}th hidden state.') print(f'Read hidden state in {self.split_sizes[self.split_index]}.') self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(self.split_size) self.que_self_attn = layers.LinearSelfAttn(self.split_size) self.word_similarity = layers.AttentionScore(self.split_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(self.split_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(self.split_size * 2, 3) self.evidence_lam = evidence_lambda self.negative_lam = negative_lambda self.add_entropy = add_entropy self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0, add_entropy: bool = False, fix_bert: bool = False): super(BertQAYesnoHierarchicalSingleRNN, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info( f'The coefficient of negative samples loss is {negative_lambda}') logger.info(f'Fix parameters of BERT: {fix_bert}') logger.info(f'Add entropy loss: {add_entropy}') # logger.info(f'Use bidirectional attention before summarizing vectors: {bi_attention}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) if fix_bert: for param in self.bert.parameters(): param.requires_grad = False self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.doc_sen_encoder = layers.StackedBRNN(config.hidden_size, config.hidden_size // 2, num_layers=1) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.negative_lam = negative_lambda self.add_entropy = add_entropy self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, sample_steps: int = 5, reward_func: int = 0, freeze_bert=False): super(BertQAYesnoHierarchicalReinforce, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info(f'Sample steps: {sample_steps}') logger.info(f'Reward function: {reward_func}') logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) if freeze_bert: for p in self.bert.parameters(): p.requires_grad = False self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.sample_steps = sample_steps self.reward_func = [self.reinforce_step, self.reinforce_step_1][reward_func] self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, use_gumbel=True, freeze_bert=False): super(BertQAYesnoHierarchicalHard, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info(f'Use gumbel: {use_gumbel}') logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) if freeze_bert: for p in self.bert.parameters(): p.requires_grad = False self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.use_gumbel = use_gumbel self.apply(self.init_bert_weights)