def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0): super(BertQAYesnoHierarchicalNeg, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.negative_lam = negative_lambda self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, freeze_predictor: bool = False): super(BertQAYesnoHierarchicalTopKfp32, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda if freeze_predictor: for param in self.yesno_predictor.parameters(): param.requires_grad = False self.freeze_predictor = freeze_predictor self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, freeze_predictor: bool = False): super(BertQAYesnoHierarchicalTopK, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) self.doc_sen_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.freeze_predictor = freeze_predictor if self.freeze_predictor: logger.info(f"Freeze parameters of yes_no predictor") for param in self.yesno_predictor.parameters(): param.requires_grad = False self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, view_id=1): super(BertQAYesnoHierarchicalTwoViewTopKfp16, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') print(f'The coefficient of view id is {view_id}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) config.hidden_size = int(config.hidden_size / 2) self.doc_sen_self_attn = rep_layers.LinearSelfAttention( config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.view_id = view_id self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, view_id=1, split_type=0): super(BertQAYesnoHierarchicalTwoViewTopK, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') print(f'The coefficient of view id is {view_id}') total_dim = 768 rank_list = list(range(total_dim)) self.split_type = split_type if split_type == 0: self.view_ranks = [ rank_list[:int(.5 * total_dim)], rank_list[int(.5 * total_dim):] ] elif split_type == 1: self.view_ranks = [rank_list[::2], rank_list[1::2]] elif split_type == 2: random.seed(19970417) random.shuffle(rank_list) self.view_ranks = [ rank_list[:int(.5 * total_dim)], rank_list[int(.5 * total_dim):] ] elif split_type == 3: random.seed(20190914) random.shuffle(rank_list) self.view_ranks = [ rank_list[:int(.5 * total_dim)], rank_list[int(.5 * total_dim):] ] else: raise ValueError("split type should be 0/1/2/3, but found %d" % (split_type)) print(self.view_ranks[view_id][:20]) layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) config.hidden_size = int(config.hidden_size / 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.view_id = view_id self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0, add_entropy: bool = False, split_num: int = 3, split_index: int = 0): super(BertQAYesnoHierarchicalNegHalf, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') print(f'The coefficient of negative samples loss is {negative_lambda}') print(f'Add entropy loss: {add_entropy}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.split_num = split_num self.split_size = config.hidden_size // self.split_num self.split_index = split_index self.split_sizes = [ ((i - 1) * self.split_size, i * self.split_size) for i in range(1, split_num) ] + [((split_num - 1) * self.split_size, config.hidden_size)] print(f'Split BERT output into {self.split_num}.') print(f'Current model use the {self.split_index}th hidden state.') print(f'Read hidden state in {self.split_sizes[self.split_index]}.') self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(self.split_size) self.que_self_attn = layers.LinearSelfAttn(self.split_size) self.word_similarity = layers.AttentionScore(self.split_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(self.split_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(self.split_size * 2, 3) self.evidence_lam = evidence_lambda self.negative_lam = negative_lambda self.add_entropy = add_entropy self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0, add_entropy: bool = False, fix_bert: bool = False): super(BertQAYesnoHierarchicalSingleRNN, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info( f'The coefficient of negative samples loss is {negative_lambda}') logger.info(f'Fix parameters of BERT: {fix_bert}') logger.info(f'Add entropy loss: {add_entropy}') # logger.info(f'Use bidirectional attention before summarizing vectors: {bi_attention}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) if fix_bert: for param in self.bert.parameters(): param.requires_grad = False self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.doc_sen_encoder = layers.StackedBRNN(config.hidden_size, config.hidden_size // 2, num_layers=1) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.negative_lam = negative_lambda self.add_entropy = add_entropy self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, num_choices=4, sample_steps: int = 5, reward_func: int = 0, freeze_bert=False): super(BertQAYesnoHierarchicalReinforceRACE, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info(f'Currently the number of choices is {num_choices}') logger.info(f'Sample steps: {sample_steps}') logger.info(f'Reward function: {reward_func}') logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) if freeze_bert: for p in self.bert.parameters(): p.requires_grad = False self.doc_sen_self_attn = rep_layers.LinearSelfAttention( config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.classifier = nn.Linear(config.hidden_size * 2, 1) self.evidence_lam = evidence_lambda self.sample_steps = sample_steps self.reward_func = [self.reinforce_step, self.reinforce_step_1][reward_func] self.num_choices = num_choices self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, num_choices=4, use_gumbel=True, freeze_bert=False): super(BertQAYesnoHierarchicalHardRACE, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info(f'Currently the number of choices is {num_choices}') logger.info(f'Use gumbel: {use_gumbel}') logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) if freeze_bert: for p in self.bert.parameters(): p.requires_grad = False # self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(config.hidden_size) # self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.doc_sen_self_attn = rep_layers.LinearSelfAttention( config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.classifier = nn.Linear(config.hidden_size * 2, 1) self.evidence_lam = evidence_lambda self.use_gumbel = use_gumbel self.num_choices = num_choices self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8): super(RobertaQAYesnoHierarchicalTopK, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.roberta = RobertaModel(config) self.doc_sen_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.init_weights()
def __init__(self, config, cls_sup: bool = False, evidence_lambda=0.8, extra_yesno_lambda=0.5): super(BertQAYesnoCLSHierarchical, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') print(f'Use cls extra supervision: {cls_sup}') print(f'The extra yesno loss lambda is {extra_yesno_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.doc_word_sum = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.que_word_sum = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.doc_sen_sum = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.cls_sup = cls_sup self.extra_yesno_lam = extra_yesno_lambda if cls_sup: self.extra_predictor = nn.Linear(config.hidden_size, 3) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.apply(self.init_bert_weights)