def build(model, max_num_word_swaps=1): # a combination of 4 different character-based transforms # ignore the first and last letter of each word, as in the paper transformation = CompositeTransformation([ WordSwapNeighboringCharacterSwap(random_one=False, skip_first_char=True, skip_last_char=True), WordSwapRandomCharacterDeletion(random_one=False, skip_first_char=True, skip_last_char=True), WordSwapRandomCharacterInsertion(random_one=False, skip_first_char=True, skip_last_char=True), WordSwapQWERTY(random_one=False, skip_first_char=True, skip_last_char=True), ]) # only edit words of length >= 4, edit max_num_word_swaps words. # note that we also are not editing the same word twice, so # max_num_word_swaps is really the max number of character # changes that can be made. The paper looks at 1 and 2 char attacks. constraints = [ MinWordLength(min_length=4), StopwordModification(), MaxWordsPerturbed(max_num_words=max_num_word_swaps), RepeatModification(), ] # untargeted attack goal_function = UntargetedClassification(model) search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)
def build(model): # # we propose five bug generation methods for TEXTBUGGER: # transformation = CompositeTransformation( [ # (1) Insert: Insert a space into the word. # Generally, words are segmented by spaces in English. Therefore, # we can deceive classifiers by inserting spaces into words. WordSwapRandomCharacterInsertion( random_one=True, letters_to_insert=" ", skip_first_char=True, skip_last_char=True, ), # (2) Delete: Delete a random character of the word except for the first # and the last character. WordSwapRandomCharacterDeletion( random_one=True, skip_first_char=True, skip_last_char=True ), # (3) Swap: Swap random two adjacent letters in the word but do not # alter the first or last letter. This is a common occurrence when # typing quickly and is easy to implement. WordSwapNeighboringCharacterSwap( random_one=True, skip_first_char=True, skip_last_char=True ), # (4) Substitute-C (Sub-C): Replace characters with visually similar # characters (e.g., replacing “o” with “0”, “l” with “1”, “a” with “@”) # or adjacent characters in the keyboard (e.g., replacing “m” with “n”). WordSwapHomoglyphSwap(), # (5) Substitute-W # (Sub-W): Replace a word with its topk nearest neighbors in a # context-aware word vector space. Specifically, we use the pre-trained # GloVe model [30] provided by Stanford for word embedding and set # topk = 5 in the experiment. WordSwapEmbedding(max_candidates=5), ] ) constraints = [RepeatModification(), StopwordModification()] # In our experiment, we first use the Universal Sentence # Encoder [7], a model trained on a number of natural language # prediction tasks that require modeling the meaning of word # sequences, to encode sentences into high dimensional vectors. # Then, we use the cosine similarity to measure the semantic # similarity between original texts and adversarial texts. # ... "Furthermore, the semantic similarity threshold \eps is set # as 0.8 to guarantee a good trade-off between quality and # strength of the generated adversarial text." constraints.append(UniversalSentenceEncoder(threshold=0.8)) # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # search_method = GreedyWordSwapWIR(wir_method="delete") return Attack(goal_function, constraints, transformation, search_method)
def Checklist2020(model): """An implementation of the attack used in "Beyond Accuracy: Behavioral Testing of NLP models with CheckList", Ribeiro et al., 2020.". This attack focuses on a number of attacks used in the Invariance Testing Method: - Contraction - Extension - Changing Names, Number, Location - possibly negation (not yet implemented) The idea is to alter elements of the sentence without actually changing the semantic of the sentence https://arxiv.org/abs/2005.04118 :param model: Model to attack. :param max_num_word_swaps: Maximum number of modifications to allow. """ transformation = CompositeTransformation( [ # WordSwapExtend(), WordSwapContract(), WordSwapChangeName(), # WordSwapChangeNumber(), WordSwapChangeLocation(), ] ) # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop constraints = [RepeatModification()] # Untargeted attack & GreedySearch goal_function = UntargetedClassification(model) search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)
def __init__(self, model="distilroberta-base", tokenizer="distilroberta-base", **kwargs): import transformers from textattack.transformations import ( CompositeTransformation, WordInsertionMaskedLM, WordMergeMaskedLM, WordSwapMaskedLM, ) shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained( model) shared_tokenizer = transformers.AutoTokenizer.from_pretrained( tokenizer) transformation = CompositeTransformation([ WordSwapMaskedLM( method="bae", masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=5e-4, ), WordInsertionMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=0.0, ), WordMergeMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=5e-3, ), ]) use_constraint = UniversalSentenceEncoder( threshold=0.7, metric="cosine", compare_against_original=True, window_size=15, skip_text_shorter_than_window=True, ) constraints = DEFAULT_CONSTRAINTS + [use_constraint] super().__init__(transformation, constraints=constraints, **kwargs)
def DeepWordBugGao2018(model, use_all_transformations=True): """ Gao, Lanchantin, Soffa, Qi. Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers. https://arxiv.org/abs/1801.04354 """ # # Swap characters out from words. Choose the best of four potential transformations. # if use_all_transformations: # We propose four similar methods: transformation = CompositeTransformation([ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion(), ]) else: # We use the Combined Score and the Substitution Transformer to generate # adversarial samples, with the maximum edit distance difference of 30 # (ϵ = 30). transformation = WordSwapRandomCharacterSubstitution() # # Don't modify the same word twice or stopwords # constraints = [RepeatModification(), StopwordModification()] # # In these experiments, we hold the maximum difference # on edit distance (ϵ) to a constant 30 for each sample. # constraints.append(LevenshteinEditDistance(30)) # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # search_method = GreedyWordSwapWIR() return Attack(goal_function, constraints, transformation, search_method)
def Pruthi2019(model, max_num_word_swaps=1): """ An implementation of the attack used in "Combating Adversarial Misspellings with Robust Word Recognition", Pruthi et al., 2019. This attack focuses on a small number of character-level changes that simulate common typos. It combines: - Swapping neighboring characters - Deleting characters - Inserting characters - Swapping characters for adjacent keys on a QWERTY keyboard. https://arxiv.org/abs/1905.11268 :param model: Model to attack. :param max_num_word_swaps: Maximum number of modifications to allow. """ # a combination of 4 different character-based transforms # ignore the first and last letter of each word, as in the paper transformation = CompositeTransformation( [ WordSwapNeighboringCharacterSwap( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapRandomCharacterDeletion( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapRandomCharacterInsertion( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapQWERTY(random_one=False, skip_first_char=True, skip_last_char=True), ] ) # only edit words of length >= 4, edit max_num_word_swaps words. # note that we also are not editing the same word twice, so # max_num_word_swaps is really the max number of character # changes that can be made. The paper looks at 1 and 2 char attacks. constraints = [ MinWordLength(min_length=4), StopwordModification(), MaxWordsPerturbed(max_num_words=max_num_word_swaps), RepeatModification(), ] # untargeted attack goal_function = UntargetedClassification(model) search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)
def __init__(self, **kwargs): from textattack.transformations import CompositeTransformation from textattack.transformations import \ WordSwapNeighboringCharacterSwap, \ WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \ WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap transformation = CompositeTransformation([ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion() ]) super().__init__(transformation, constraints=DEFAULT_CONSTRAINTS, **kwargs)
def build(model): transformation = CompositeTransformation([ WordSwapExtend(), WordSwapContract(), WordSwapChangeName(), WordSwapChangeNumber(), WordSwapChangeLocation(), ]) # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop constraints = [RepeatModification()] # Untargeted attack & GreedySearch goal_function = UntargetedClassification(model) search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)
def build(model, use_all_transformations=True, ensemble: bool=False): # # Swap characters out from words. Choose the best of four potential transformations. # if use_all_transformations: # We propose four similar methods: transformation = CompositeTransformation( [ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion(), ] ) else: # We use the Combined Score and the Substitution Transformer to generate # adversarial samples, with the maximum edit distance difference of 30 # (ϵ = 30). transformation = WordSwapRandomCharacterSubstitution() # # Don't modify the same word twice or stopwords # constraints = [RepeatModification(), StopwordModification()] # # In these experiments, we hold the maximum difference # on edit distance (ϵ) to a constant 30 for each sample. # constraints.append(LevenshteinEditDistance(30)) # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # search_method = GreedyWordSwapWIR(ensemble=ensemble) return Attack(goal_function, constraints, transformation, search_method)
def __init__(self, **kwargs): from textattack.transformations import ( CompositeTransformation, WordSwapChangeLocation, WordSwapChangeName, WordSwapChangeNumber, WordSwapContract, WordSwapExtend, ) transformation = CompositeTransformation([ WordSwapChangeNumber(), WordSwapChangeLocation(), WordSwapChangeName(), WordSwapExtend(), WordSwapContract(), ]) constraints = [DEFAULT_CONSTRAINTS[0]] super().__init__(transformation, constraints=constraints, **kwargs)
def DeepWordBugGao2018(model, use_all_transformations=True): # # Swap characters out from words. Choose the best of four potential transformations. # if use_all_transformations: # We propose four similar methods: transformation = CompositeTransformation([ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion() ]) else: # We use the Combined Score and the Substitution Transformer to generate # adversarial samples, with the maximum edit distance difference of 30 # (ϵ = 30). transformation = WordSwapRandomCharacterSubstitution() # # In these experiments, we hold the maximum difference # on edit distance (ϵ) to a constant 30 for each sample. # constraints = [LevenshteinEditDistance(30)] # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # attack = GreedyWordSwapWIR(goal_function, transformation=transformation, constraints=constraints, max_depth=None) return attack
def build(model): # "This paper presents CLARE, a ContextuaLized AdversaRial Example generation model # that produces fluent and grammatical outputs through a mask-then-infill procedure. # CLARE builds on a pre-trained masked language model and modifies the inputs in a context-aware manner. # We propose three contex-tualized perturbations, Replace, Insert and Merge, allowing for generating outputs of # varied lengths." # # "We experiment with a distilled version of RoBERTa (RoBERTa_{distill}; Sanh et al., 2019) # as the masked language model for contextualized infilling." # Because BAE and CLARE both use similar replacement papers, we use BAE's replacement method here. shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained( "distilroberta-base") shared_tokenizer = transformers.AutoTokenizer.from_pretrained( "distilroberta-base") transformation = CompositeTransformation([ WordSwapMaskedLM( method="bae", masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=5e-4, ), WordInsertionMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=0.0, ), WordMergeMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=5e-3, ), ]) # # Don't modify the same word twice or stopwords. # constraints = [RepeatModification(), StopwordModification()] # "A common choice of sim(·,·) is to encode sentences using neural networks, # and calculate their cosine similarity in the embedding space (Jin et al., 2020)." # The original implementation uses similarity of 0.7. use_constraint = UniversalSentenceEncoder( threshold=0.7, metric="cosine", compare_against_original=True, window_size=15, skip_text_shorter_than_window=True, ) constraints.append(use_constraint) # Goal is untargeted classification. # "The score is then the negative probability of predicting the gold label from f, using [x_{adv}] as the input" goal_function = UntargetedClassification(model) # "To achieve this, we iteratively apply the actions, # and first select those minimizing the probability of outputting the gold label y from f." # # "Only one of the three actions can be applied at each position, and we select the one with the highest score." # # "Actions are iteratively applied to the input, until an adversarial example is found or a limit of actions T # is reached. # Each step selects the highest-scoring action from the remaining ones." # search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)
def attack_from_queue(args, in_queue, out_queue): gpu_id = torch.multiprocessing.current_process()._identity[0] - 2 set_env_variables(gpu_id) config = BertConfig.from_pretrained("hfl/chinese-macbert-base") # "hfl/chinese-macbert-base" config.output_attentions = False config.output_token_type_ids = False # config.max_length = 30 tokenizer = BertTokenizer.from_pretrained("hfl/chinese-macbert-base", config=config) config = AutoConfig.from_pretrained( './models/roberta/chinese-roberta-wwm-ext-OCNLI-2021-01-05-23-46-02-975289', num_labels=3 ) # for normal model = AutoModelForSequenceClassification.from_pretrained( './models/roberta/chinese-roberta-wwm-ext-OCNLI-2021-01-05-23-46-02-975289', config=config, ) model_wrapper = HuggingFaceModelWrapper(model, tokenizer, batch_size=24) # for normal # shared_masked_lm = BertModel.from_pretrained( # "bert-base-chinese" # ) # for mask!!! shared_masked_lm = AutoModelForMaskedLM.from_pretrained( "bert-base-chinese" ) shared_tokenizer = BertTokenizer.from_pretrained( "bert-base-chinese" ) transformation = CompositeTransformation( [ WordSwapMaskedLM( method="bae", masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=5, min_confidence=5e-4, ), WordInsertionMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=5, min_confidence=0.0, ), WordMergeMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=5, min_confidence=5e-3, ), ] ) # goal function goal_function = UntargetedClassification(model_wrapper) # constraints stopwords = set( ["个", "关于", "之上", "across", "之后", "afterwards", "再次", "against", "ain", "全部", "几乎", "单独", "along", "早已", "也", "虽然", "是", "among", "amongst", "一个", "和", "其他", "任何", "anyhow", "任何人", "anything", "anyway", "anywhere", "are", "aren", "没有", "around", "as", "at", "后", "been", "之前", "beforehand", "behind", "being", "below", "beside", "besides", "之間", "beyond", "皆是", "但", "by", "可以", "不可以", "是", "不是", "couldn't", "d", "didn", "didn't", "doesn", "doesn't", "don", "don't", "down", "due", "之間", "either", "之外", "elsewhere", "空", "足夠", "甚至", "ever", "任何人", "everything", "everywhere", "except", "first", "for", "former", "formerly", "from", "hadn", "hadn't", "hasn", "hasn't", "haven", "haven't", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "i", "if", "in", "indeed", "into", "is", "isn", "isn't", "it", "it's", "its", "itself", "just", "latter", "latterly", "least", "ll", "may", "me", "meanwhile", "mightn", "mightn't", "mine", "more", "moreover", "most", "mostly", "must", "mustn", "mustn't", "my", "myself", "namely", "needn", "needn't", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "o", "of", "off", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "per", "please", "s", "same", "shan", "shan't", "she", "she's", "should've", "shouldn", "shouldn't", "somehow", "something", "sometime", "somewhere", "such", "t", "than", "that", "that'll", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "this", "those", "through", "throughout", "thru", "thus", "to", "too", "toward", "towards", "under", "unless", "until", "up", "upon", "used", "ve", "was", "wasn", "wasn't", "we", "were", "weren", "weren't", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "with", "within", "without", "won", "won't", "would", "wouldn", "wouldn't", "y", "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"] ) constraints = [RepeatModification(), StopwordModification()] # input_column_modification = InputColumnModification( # ["premise", "hypothesis"], {"premise"} # ) # constraints.append(input_column_modification) # constraints.append(WordEmbeddingDistance(min_cos_sim=0.5)) use_constraint = UniversalSentenceEncoder( threshold=0.7, metric="cosine", compare_against_original=True, window_size=15, skip_text_shorter_than_window=True, ) constraints.append(use_constraint) # constraints = [ # MaxWordsPerturbed(5), # ] # transformation # transformation = WordSwapMaskedLM(method="bae", max_candidates=50, min_confidence=0.0) # transformation = WordSwapEmbedding(max_candidates=10) # transformation = WordDeletion() # search methods # search_method = GreedyWordSwapWIR(wir_method="delete") search_method = GreedySearch() textattack.shared.utils.set_seed(args.random_seed) attack = Attack(goal_function, constraints, transformation, search_method) # attack = parse_attack_from_args(args) if gpu_id == 0: print(attack, "\n") while not in_queue.empty(): try: i, text, output = in_queue.get() results_gen = attack.attack_dataset([(text, output)]) result = next(results_gen) out_queue.put((i, result)) except Exception as e: out_queue.put(e) exit()