def prepare_masked_instances_for_humans(sentences, config): masked_examples = {} for truism in sentences: for perturbation in sentences[truism]: if 'paraphrase' not in perturbation: candidate_answers = config[truism]['premise_switch']['0'] elif '_inversion' not in perturbation: candidate_answers = config[truism]['premise_switch']['1'] else: candidate_answers = config[truism]['premise_switch']['2'] for premise in sentences[truism][perturbation]: key = "-".join([truism, perturbation, premise]) statement = sentences[truism][perturbation][premise] premise = statement.split(",")[0] conclusion = statement.split(",")[1] right_answer = None wrong_answer = None for answer in candidate_answers: if pad_string(answer, False) in conclusion: conclusion = conclusion.replace( " " + answer + " ", " _____ ") right_answer = answer else: wrong_answer = answer if right_answer and wrong_answer: masked_statement = premise + ", " + conclusion masked_examples[key] = (masked_statement, right_answer, wrong_answer) return masked_examples
def prep_ft_instances_for_sampling_by_sets(sentences, config, fictitious_entities, num_entity_trials): random.seed(1012) statements = [] for truism in sentences: for perturbation in sentences[truism]: if 'paraphrase' not in perturbation: candidate_answers = config[truism]['premise_switch']['0'] elif '_inversion' not in perturbation: candidate_answers = config[truism]['premise_switch']['1'] else: candidate_answers = config[truism]['premise_switch']['2'] for premise in sentences[truism][perturbation]: statement = sentences[truism][perturbation][premise] premise = statement.split(",")[0] conclusion = statement.split(",")[1] right_answer = None for answer in candidate_answers: if pad_string(answer, False) in conclusion: right_answer = answer for entity_pair in random.sample(fictitious_entities, num_entity_trials): new_statement = re.sub(r"\bA\b", entity_pair[0], statement) new_statement = re.sub(r"\bB\b", entity_pair[1], new_statement) statements.append((new_statement, right_answer)) return statements
def prepare_masked_instances(sentences, config, fictitious_entities, num_entity_trials): masked_examples = {} for truism in sentences: for perturbation in sentences[truism]: if config != None: if 'paraphrase' not in perturbation: candidate_answers = config[truism]['premise_switch']['0'] elif '_inversion' not in perturbation: candidate_answers = config[truism]['premise_switch']['1'] else: candidate_answers = config[truism]['premise_switch']['2'] for premise in sentences[truism][perturbation]: key = "-".join([truism, perturbation, premise]) if config == None: if sentences[truism][perturbation][premise] == []: continue candidate_answers = [ sentences[truism][perturbation][premise][2], sentences[truism][perturbation][premise][3] ] statement = sentences[truism][perturbation][premise][0] else: statement = sentences[truism][perturbation][premise] premise = statement.split(",")[0] conclusion = statement.split(",")[1] right_answer = None wrong_answer = None for answer in candidate_answers: if pad_string(answer, False) in conclusion: conclusion = conclusion.replace( " " + answer + " ", " <mask> ") right_answer = answer else: wrong_answer = answer if right_answer and wrong_answer: masked_statement = premise + "," + conclusion masked_examples[key] = [] for entity_pair in random.sample(fictitious_entities, num_entity_trials): new_masked_statement = re.sub(r"\bA\b", entity_pair[0], masked_statement) new_masked_statement = re.sub( r"\bB\b", entity_pair[1], new_masked_statement).capitalize() masked_examples[key].append( (new_masked_statement, right_answer, wrong_answer)) return masked_examples
def prepare_masked_easy_instances(sentences, config, fictitious_entities, num_entity_trials): masked_examples = {} for truism in sentences: for perturbation in sentences[truism]: candidate_answers = config[truism]['premise_switch']['0'] for premise in sentences[truism][perturbation]: key = "-".join([truism, perturbation, premise]) statement = sentences[truism][perturbation][premise] parts = statement.split(",") masked_portion = parts[len(parts) - 1] right_answer = None wrong_answer = None for answer in candidate_answers: if pad_string(answer, False) in masked_portion: masked_portion = masked_portion.replace( " " + answer + " ", " <mask> ") right_answer = answer else: wrong_answer = answer masked_statement = "" for i in range(len(parts) - 1): masked_statement += parts[i] masked_statement += "," if right_answer and wrong_answer: masked_statement += masked_portion masked_examples[key] = [] for entity_pair in random.sample(fictitious_entities, num_entity_trials): new_masked_statement = re.sub(r"\bA\b", entity_pair[0], masked_statement) new_masked_statement = re.sub( r"\bB\b", entity_pair[1], new_masked_statement).capitalize() masked_examples[key].append( (new_masked_statement, right_answer, wrong_answer)) return masked_examples