Пример #1
0
    def __init__(self, gold_file_path, predictions_file_path):
        """
        Evaluates the results of a StereoSet predictions file with respect to the gold label file.

        Args:
            - gold_file_path: path, relative or absolute, to the gold file
            - predictions_file_path : path, relative or absolute, to the predictions file

        Returns:
            - overall, a dictionary of composite scores for intersentence and intrasentence
        """
        # cluster ID, gold_label to sentence ID
        stereoset = dataloader.StereoSet(gold_file_path) 
        self.intersentence_examples = stereoset.get_intersentence_examples() 
        self.intrasentence_examples = stereoset.get_intrasentence_examples() 
        self.id2term = {}
        self.id2gold = {}
        self.id2score = {}
        self.example2sent = {}
        self.domain2example = {"intersentence": defaultdict(lambda: []), 
                               "intrasentence": defaultdict(lambda: [])}

        with open(predictions_file_path) as f:
            self.predictions = json.load(f)

        for example in self.intrasentence_examples:
            for sentence in example.sentences:
                self.id2term[sentence.ID] = example.target
                self.id2gold[sentence.ID] = sentence.gold_label
                self.example2sent[(example.ID, sentence.gold_label)] = sentence.ID
                self.domain2example['intrasentence'][example.bias_type].append(example)

        for example in self.intersentence_examples:
            for sentence in example.sentences:
                self.id2term[sentence.ID] = example.target
                self.id2gold[sentence.ID] = sentence.gold_label
                self.example2sent[(example.ID, sentence.gold_label)] = sentence.ID
                self.domain2example['intersentence'][example.bias_type].append(example)

        for sent in self.predictions.get('intrasentence', []) + self.predictions.get('intersentence', []):
            self.id2score[sent['id']] = sent['score']

        results = defaultdict(lambda: {})

        for split in ['intrasentence', 'intersentence']:
            for domain in ['gender', 'profession', 'race', 'religion']:
                results[split][domain] = self.evaluate(self.domain2example[split][domain])

        results['intersentence']['overall'] = self.evaluate(self.intersentence_examples) 
        results['intrasentence']['overall'] = self.evaluate(self.intrasentence_examples) 
        results['overall'] = self.evaluate(self.intersentence_examples + self.intrasentence_examples)
        self.results = results
Пример #2
0
    def __init__(self, tokenizer, args): 
        self.tokenizer = tokenizer
        filename = args.input_file
        dataset = dataloader.StereoSet(filename)
        self.emp_max_seq_length = float("-inf")
        self.max_seq_length = args.max_seq_length
        self.batch_size = args.batch_size

        if self.tokenizer.__class__.__name__=="XLNetTokenizer":
            self.prepend_text = """ In 1991, the remains of Russian Tsar Nicholas II and his family
		(except for Alexei and Maria) are discovered.
		The voice of Nicholas's young son, Tsarevich Alexei Nikolaevich, narrates the
		remainder of the story. 1883 Western Siberia,
		a young Grigori Rasputin is asked by his father and a group of men to perform magic.
		Rasputin has a vision and denounces one of the men as a horse thief. Although his
		father initially slaps him for making such an accusation, Rasputin watches as the
		man is chased outside and beaten. Twenty years later, Rasputin sees a vision of
		the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famous,
		with people, even a bishop, begging for his blessing. <eod> </s> <eos> """ 
            self.prepend_text = None
        else:
            self.prepend_text = None

        intersentence_examples = dataset.get_intersentence_examples()

        self.preprocessed = [] 
        for example in intersentence_examples:
            context = example.context
            if self.prepend_text is not None:
                context = self.prepend_text + context 
            for sentence in example.sentences:
                # if self.tokenizer.__class__.__name__ in ["XLNetTokenizer", "RobertaTokenizer"]:
                if self.tokenizer.__class__.__name__ in ["XLNetTokenizer", "RobertaTokenizer"]: #, "BertTokenizer"]:
                    # support legacy pretrained NSP heads!
                    input_ids, token_type_ids = self._tokenize(context, sentence.sentence)
                    attention_mask = [1 for _ in input_ids] 
                    self.preprocessed.append((input_ids, token_type_ids, attention_mask, sentence.ID))  
                else:
                    s = f"{context} {sentence.sentence}"
                    pad_to_max_length = self.batch_size>1
                    encoded_dict = self.tokenizer.encode_plus(text=context, text_pair=sentence.sentence, truncation=True, add_special_tokens=True, max_length=self.max_seq_length, truncation_strategy="longest_first", pad_to_max_length=pad_to_max_length, return_tensors="pt", return_token_type_ids=True, return_attention_mask=True, return_overflowing_tokens=False, return_special_tokens_mask=False)
                    # prior tokenization
                    # input_ids, position_ids, attention_mask = self._tokenize(context, sentence)

                    input_ids = encoded_dict['input_ids']
                    token_type_ids = encoded_dict['token_type_ids']
                    attention_mask = encoded_dict['attention_mask']
                    self.preprocessed.append((input_ids, token_type_ids, attention_mask, sentence.ID))

        print(f"Maximum sequence length found: {self.emp_max_seq_length}")
Пример #3
0
    def __init__(self, pretrained_class="gpt2", no_cuda=False, batch_size=51, input_file="data/bias.json",
                 intrasentence_model="GPT2LM", intrasentence_load_path=None, intersentence_model="ModelNSP",
                 intersentence_load_path=None, tokenizer="GPT2Tokenizer", unconditional_start_token="<|endoftext|>",
                 skip_intrasentence=False, skip_intersentence=False, max_seq_length=64, small=False,
                 output_dir="predictions/"):
        print(f"Loading {input_file}...")
        self.BATCH_SIZE = batch_size
        filename = os.path.abspath(input_file)
        self.dataloader = dataloader.StereoSet(filename)
        self.cuda = not no_cuda
        self.device = "cuda" if self.cuda else "cpu"
        self.SKIP_INTERSENTENCE = skip_intersentence
        self.SKIP_INTRASENTENCE = skip_intrasentence
        self.UNCONDITIONAL_START_TOKEN = unconditional_start_token

        # store pruning information
        self.PRUNE_PERCENT = prune_percent
        self.STORE_WEIGHT_LOCATION = store_weight_location

        self.PRETRAINED_CLASS = pretrained_class
        self.TOKENIZER = tokenizer
        self.tokenizer = getattr(transformers, self.TOKENIZER).from_pretrained(
            self.PRETRAINED_CLASS)

        self.INTRASENTENCE_MODEL = intrasentence_model
        self.INTRASENTENCE_LOAD_PATH = intrasentence_load_path
        self.INTERSENTENCE_MODEL = intersentence_model
        self.INTERSENTENCE_LOAD_PATH = intersentence_load_path
        self.max_seq_length = max_seq_length

        print("---------------------------------------------------------------")
        print(
            f"{Fore.LIGHTCYAN_EX}                     ARGUMENTS                 {Style.RESET_ALL}")
        print(
            f"{Fore.LIGHTCYAN_EX}Pretrained class:{Style.RESET_ALL} {pretrained_class}")
        print(f"{Fore.LIGHTCYAN_EX}Unconditional Start Token: {Style.RESET_ALL} {self.UNCONDITIONAL_START_TOKEN}")
        print(f"{Fore.LIGHTCYAN_EX}Tokenizer:{Style.RESET_ALL} {tokenizer}")
        print(
            f"{Fore.LIGHTCYAN_EX}Skip Intrasentence:{Style.RESET_ALL} {self.SKIP_INTRASENTENCE}")
        print(
            f"{Fore.LIGHTCYAN_EX}Intrasentence Model:{Style.RESET_ALL} {self.INTRASENTENCE_MODEL}")
        print(
            f"{Fore.LIGHTCYAN_EX}Skip Intersentence:{Style.RESET_ALL} {self.SKIP_INTERSENTENCE}")
        print(
            f"{Fore.LIGHTCYAN_EX}Intersentence Model:{Style.RESET_ALL} {self.INTERSENTENCE_MODEL}")
        print(f"{Fore.LIGHTCYAN_EX}CUDA:{Style.RESET_ALL} {self.cuda}")
        print("---------------------------------------------------------------")
Пример #4
0
def main(args):
    filename = args.input_file
    dataset = dataloader.StereoSet(filename, ignore_harm=True)

    intrasentence_examples = dataset.get_intrasentence_examples()
    intersentence_examples = dataset.get_intersentence_examples()
    c = defaultdict(lambda: Counter())

    for example in intrasentence_examples:
        c[example.bias_type][example.target] += 1

    for example in intersentence_examples:
        c[example.bias_type][example.target] += 1

    for domain, term in c.items():
        print()
        print(domain)
        for k, v in sorted(term.items(), key=lambda x: x[1], reverse=True):
            print(f"{k}: {v}")
    print()
    def __init__(self,
                 pretrained_class="bert-large-uncased-whole-word-masking",
                 no_cuda=False,
                 input_file="data/bias.json",
                 intrasentence_model="BertLM",
                 intersentence_model="BertNextSentence",
                 tokenizer="BertTokenizer",
                 intersentence_load_path=None,
                 intrasentence_load_path=None,
                 skip_intrasentence=False,
                 skip_intersentence=False,
                 batch_size=1,
                 max_seq_length=128,
                 output_dir="predictions/",
                 output_file="predictions.json"):
        print(f"Loading {input_file}...")
        filename = os.path.abspath(input_file)
        self.dataloader = dataloader.StereoSet(filename)
        self.cuda = not no_cuda
        self.device = "cuda" if self.cuda else "cpu"

        self.INTRASENTENCE_LOAD_PATH = intrasentence_load_path
        self.INTERSENTENCE_LOAD_PATH = intersentence_load_path
        self.SKIP_INTERSENTENCE = skip_intersentence
        self.SKIP_INTRASENTENCE = skip_intrasentence
        self.INTRASENTENCE_LOAD_PATH = intrasentence_load_path
        self.INTERSENTENCE_LOAD_PATH = intersentence_load_path

        self.PRETRAINED_CLASS = pretrained_class
        self.TOKENIZER = tokenizer
        self.tokenizer = getattr(transformers, self.TOKENIZER).from_pretrained(
            self.PRETRAINED_CLASS, padding_side="right")

        # to keep padding consistent with the other models -> improves LM score.
        if self.tokenizer.__class__.__name__ == "XLNetTokenizer":
            self.tokenizer.padding_side = "right"
        self.MASK_TOKEN = self.tokenizer.mask_token

        # Set this to be none if you don't want to batch items together!
        self.batch_size = batch_size
        self.max_seq_length = None if self.batch_size == 1 else max_seq_length

        self.MASK_TOKEN_IDX = self.tokenizer.encode(self.MASK_TOKEN,
                                                    add_special_tokens=False)
        assert len(self.MASK_TOKEN_IDX) == 1
        self.MASK_TOKEN_IDX = self.MASK_TOKEN_IDX[0]

        self.INTRASENTENCE_MODEL = intrasentence_model
        self.INTERSENTENCE_MODEL = intersentence_model

        print(
            "---------------------------------------------------------------")
        print(
            f"{Fore.LIGHTCYAN_EX}                     ARGUMENTS                 {Style.RESET_ALL}"
        )
        print(
            f"{Fore.LIGHTCYAN_EX}Pretrained class:{Style.RESET_ALL} {pretrained_class}"
        )
        print(
            f"{Fore.LIGHTCYAN_EX}Mask Token:{Style.RESET_ALL} {self.MASK_TOKEN}"
        )
        print(f"{Fore.LIGHTCYAN_EX}Tokenizer:{Style.RESET_ALL} {tokenizer}")
        print(
            f"{Fore.LIGHTCYAN_EX}Skip Intrasentence:{Style.RESET_ALL} {self.SKIP_INTRASENTENCE}"
        )
        print(
            f"{Fore.LIGHTCYAN_EX}Intrasentence Model:{Style.RESET_ALL} {self.INTRASENTENCE_MODEL}"
        )
        print(
            f"{Fore.LIGHTCYAN_EX}Skip Intersentence:{Style.RESET_ALL} {self.SKIP_INTERSENTENCE}"
        )
        print(
            f"{Fore.LIGHTCYAN_EX}Intersentence Model:{Style.RESET_ALL} {self.INTERSENTENCE_MODEL}"
        )
        print(f"{Fore.LIGHTCYAN_EX}CUDA:{Style.RESET_ALL} {self.cuda}")
        print(
            "---------------------------------------------------------------")
Пример #6
0
def main(args):
    model_predictions = defaultdict(lambda: {})
    predictions_dir = args.predictions_dir
    if args.predictions_dir[-1] != "/":
        predictions_dir = args.predictions_dir + "/"

    for model_file in glob(predictions_dir + "*.json"):
        print()
        print(f"Ingesting {model_file}...")
        with open(model_file, "r+") as f:
            model_preds = json.load(f)

        id2score = {}
        for p in model_preds['intersentence'] + model_preds['intrasentence']:
            id2score[p['id']] = p['score']

        intersentence_ids = set()
        for p in model_preds['intersentence']:
            intersentence_ids.add(p['id'])

        pretrained_class = os.path.basename(model_file).split("_")[1]
        model_predictions[pretrained_class] = id2score

    predictions = Counter()
    stereoset = dataloader.StereoSet(args.gold_file)
    examples = stereoset.get_intrasentence_examples(
    ) + stereoset.get_intersentence_examples()
    unrelateds = set()

    BERT_INTERSENTENCE_WEIGHT = 35.0
    GPT_INTERSENTENCE_WEIGHT = 15.0
    BERT_INTRASENTENCE_WEIGHT = 1.0
    GPT_INTRASENTENCE_WEIGHT = 10000.0

    for example in examples:
        assert len(example.sentences) == 3
        for (pair_a, pair_b) in [(0, 1), (1, 2), (2, 0)]:
            for k in ['gpt2-large', "bert-large-cased", "gpt2-medium"]:
                v = model_predictions[k]
                id_a = example.sentences[pair_a].ID
                id_b = example.sentences[pair_b].ID

                for pair_x, id_x in [(pair_a, id_a), (pair_b, id_b)]:
                    if example.sentences[pair_x].gold_label == "unrelated":
                        unrelateds.add(id_x)

                prediction_a = v[id_a]
                prediction_b = v[id_b]

                if id_a not in predictions:
                    predictions[id_a] = 0
                if id_b not in predictions:
                    predictions[id_b] = 0

                if id_a in intersentence_ids:
                    if prediction_a == prediction_b:
                        pass
                    elif prediction_a > prediction_b:
                        if 'gpt2' in k:
                            predictions[id_a] += GPT_INTERSENTENCE_WEIGHT * (
                                prediction_a)
                        else:
                            predictions[id_a] += BERT_INTERSENTENCE_WEIGHT * (
                                prediction_a)
                    else:
                        if 'gpt2' in k:
                            predictions[id_b] += GPT_INTERSENTENCE_WEIGHT * (
                                prediction_b)
                        else:
                            predictions[id_b] += BERT_INTERSENTENCE_WEIGHT * (
                                prediction_b)
                else:
                    if prediction_a == prediction_b:
                        pass
                    elif prediction_a > prediction_b:
                        if 'gpt2' in k:
                            predictions[id_a] += GPT_INTRASENTENCE_WEIGHT * (
                                prediction_a)
                        else:
                            predictions[id_a] += BERT_INTRASENTENCE_WEIGHT * (
                                prediction_a)
                    else:
                        if 'gpt2' in k:
                            predictions[id_b] += GPT_INTRASENTENCE_WEIGHT * (
                                prediction_b)
                        else:
                            predictions[id_b] += BERT_INTRASENTENCE_WEIGHT * (
                                prediction_b)

    final_predictions = {"intersentence": [], "intrasentence": []}
    for k, v in predictions.items():
        d = {}
        d['id'] = k
        d['score'] = v
        if d['id'] in intersentence_ids:
            final_predictions['intersentence'].append(d)
        else:
            final_predictions['intrasentence'].append(d)

    print("Dumping results to", args.output_file)
    with open(args.output_file, "w+") as f:
        json.dump(final_predictions, f, indent=2)
Пример #7
0
def main(args):
    filename = args.input_file
    dataset = dataloader.StereoSet(filename)

    intrasentence_examples = dataset.get_intrasentence_examples()
    intersentence_examples = dataset.get_intersentence_examples()
    c = Counter()

    intrasentence = defaultdict(lambda: [])
    intrasentence_harm = {
        "neutral": 0,
        "stereotype": 0,
        "anti-stereotype": 0,
        "undecided": 0
    }

    terms = {
        "intersentence": defaultdict(lambda: set()),
        "intrasentence": defaultdict(lambda: set()),
        "overall": set()
    }
    cats = {
        "intersentence": defaultdict(lambda: 0),
        "intrasentence": defaultdict(lambda: 0),
        "overall": 0
    }
    domains_counter = Counter()

    for example in intrasentence_examples:
        terms['intrasentence'][example.bias_type].add(example.target)
        terms['overall'].add(example.target)
        c[example.bias_type] += 1
        cats['overall'] += 1
        cats['intrasentence'][example.bias_type] += 1
        for sentence in example.sentences:
            # intrasentence[sentence.gold_label].append(sentence.sentence)
            intrasentence[example.bias_type].append(sentence.sentence)
        intrasentence_harm[example.harm['gold_label']] += 1

    intersentence = defaultdict(lambda: [])
    intersentence_harm = {
        "neutral": 0,
        "stereotype": 0,
        "anti-stereotype": 0,
        "undecided": 0
    }

    for example in intersentence_examples:
        context = example.context
        terms['intersentence'][example.bias_type].add(example.target)
        terms['overall'].add(example.target)
        cats['intersentence'][example.bias_type] += 1
        cats['overall'] += 1
        c[example.bias_type] += 1
        for sentence in example.sentences:
            # intersentence[sentence.gold_label].append((context, sentence.sentence))
            intersentence[example.bias_type].append(
                (context, sentence.sentence))
        intersentence_harm[example.harm['gold_label']] += 1

    print("Intrasentence!")
    lengths = {"intersentence": [], "intrasentence": []}
    for k, v in intrasentence.items():
        avg_len = np.mean([len(i.split(" ")) for i in v])
        print(f"Average length of {k}: ", avg_len, "words")
        lengths['intrasentence'].append(avg_len)

        # with open(f"corpus/intrasentence_{k}.txt", "w+") as f:
        # f.write("\n".join(v))
    # print(intrasentence_harm)
    print(np.mean(lengths['intrasentence']))
    print()
    print("Intersentence!")
    for k, v in intersentence.items():
        avg_len = np.mean([len(" ".join(i).split(" ")) for i in v])
        print(f"Average length of {k}: ", avg_len, "words")
        lengths['intersentence'].append(avg_len)
        # with open(f"corpus/intersentence_{k}.txt", "w+") as f:
        # f.write("\n".join([f"{i[0]} {i[1]}" for i in v]))
    # print(intersentence_harm)
    print(np.mean(lengths['intersentence']))
    print("Overall Avg Length:",
          np.mean(lengths['intersentence'] + lengths['intrasentence']))
    print()

    total = sum(c.values())
    print(f"Total Examples: {total}")
    print(f"Number of total terms: {len(terms)}")
    for k, v in sorted(c.items(), key=lambda x: x[0]):
        print(f"{k}: {v}, {v / total}")
    print()

    print("------- TERMS ANALYSIS -------")
    for cat in ['intersentence', 'intrasentence']:
        total = 0
        for domain, s in terms[cat].items():
            print(f"{domain}: {len(s)}")
            total += len(s)
        print(f"{cat.capitalize()}: {total}")
        print()
    print("Overall total:", len(terms['overall']))
    print()

    print("------- TRIPLETS ANALYSIS -------")
    for cat in ['intersentence', 'intrasentence']:
        total = 0
        for domain, s in cats[cat].items():
            print(f"{domain}: {s}")
            total += s
        print(f"{cat.capitalize()}: {total}")
        print()
    print("Overall total:", cats['overall'])
    print()
Пример #8
0
def main(args):
    MODEL_NAMES = [
        'bert-large-cased', 'xlnet-large-cased', 'roberta-base', 'gpt2-medium',
        'xlnet-base-cased', 'roberta-large', 'gpt2-large', 'bert-base-cased',
        'gpt2'
    ]
    sentence_ids = []  # a list of tuples of (pro_id, anti_id, unrelated_id)

    gold_file = dataloader.StereoSet(args.gold_file)
    intrasentence_examples = gold_file.get_intrasentence_examples()
    intersentence_examples = gold_file.get_intersentence_examples()
    examples = intrasentence_examples + intersentence_examples
    target_counts = Counter()

    for example in examples:
        d = {}
        for sentence in example.sentences:
            d[sentence.gold_label] = sentence
        d['type'] = example.bias_type
        d['target'] = example.target
        target_counts[example.target] += 1
        sentence_ids.append(d)

    sent2score = defaultdict(lambda: dict())
    for predictions_file in glob(args.input_dir + "*.json"):
        idx = 2 if "_" in args.input_dir else 1
        model_name = predictions_file.split("_")[idx]
        with open(predictions_file, "r") as f:
            results = json.load(f)
        for result in results['intrasentence']:
            sent2score[result['id']][model_name] = result['score']
        for result in results['intersentence']:
            sent2score[result['id']][model_name] = result['score']

    count = 0.0
    domains = Counter()
    terms_per_domain = defaultdict(lambda: Counter())
    for sentence_pair in sentence_ids:
        l = []
        for model in MODEL_NAMES:
            # Pro-Stereotype Case
            if args.type == "pro" and (
                (sent2score[sentence_pair['stereotype'].ID][model] >
                 sent2score[sentence_pair['anti-stereotype'].ID][model]) and
                (sent2score[sentence_pair['stereotype'].ID][model] >
                 sent2score[sentence_pair['unrelated'].ID][model])):
                l.append(True)
            # anti-stereotype case
            elif args.type == "anti" and (
                (sent2score[sentence_pair['anti-stereotype'].ID][model] >
                 sent2score[sentence_pair['stereotype'].ID][model]) and
                (sent2score[sentence_pair['anti-stereotype'].ID][model] >
                 sent2score[sentence_pair['unrelated'].ID][model])):
                l.append(True)
            elif args.type == "unrelated" and (
                (sent2score[sentence_pair['unrelated'].ID][model] >
                 sent2score[sentence_pair['stereotype'].ID][model]) and
                (sent2score[sentence_pair['unrelated'].ID][model] >
                 sent2score[sentence_pair['anti-stereotype'].ID][model])):
                l.append(True)
            else:
                l.append(False)
        if all(l):
            for k, v in sentence_pair.items():
                if k in ["type", "target"]:
                    continue
                if args.domain_filter == None or args.domain_filter == sentence_pair[
                        'type']:
                    print(f"{k}: {v.sentence}, {v.ID}")
            print()
            count += 1.0
            domains[sentence_pair['type']] += 1
            terms_per_domain[sentence_pair['type']][
                sentence_pair['target']] += 1

    print(f"Number of clusters that models agree on: {count}")
    print("Breakdown by Domain:", domains)
    for domain in domains.keys():
        print(f"Domain: {domain}")
        terms = terms_per_domain[domain]
        normalized_terms = {}
        for k, v in terms.items():
            normalized_terms[k] = v / target_counts[k]
        normalized_terms = {
            k: v
            for k, v in sorted(normalized_terms.items(),
                               key=lambda item: item[1],
                               reverse=True)
        }
        print(normalized_terms)
        print()