示例#1
0
def main(args):
    if args.dataset == 'mr':
        #     data, label = dataloader.read_MR(args.path)
        #     train_x, train_y, test_x, test_y = dataloader.cv_split2(
        #         data, label,
        #         nfold=10,
        #         valid_id=args.cv
        #     )
        #
        #     if args.save_data_split:
        #         save_data(train_x, train_y, args.path, 'train')
        #         save_data(test_x, test_y, args.path, 'test')
        train_x, train_y = dataloader.read_corpus(
            '/data/medg/misc/jindi/nlp/datasets/mr/train.txt')
        test_x, test_y = dataloader.read_corpus(
            '/data/medg/misc/jindi/nlp/datasets/mr/test.txt')
    elif args.dataset == 'imdb':
        train_x, train_y = dataloader.read_corpus(os.path.join(
            '/data/medg/misc/jindi/nlp/datasets/imdb', 'train_tok.csv'),
                                                  clean=False,
                                                  MR=True,
                                                  shuffle=True)
        test_x, test_y = dataloader.read_corpus(os.path.join(
            '/data/medg/misc/jindi/nlp/datasets/imdb', 'test_tok.csv'),
                                                clean=False,
                                                MR=True,
                                                shuffle=True)
    else:
        train_x, train_y = dataloader.read_corpus(
            '/afs/csail.mit.edu/u/z/zhijing/proj/to_di/data/{}/'
            'train_tok.csv'.format(args.dataset),
            clean=False,
            MR=False,
            shuffle=True)
        test_x, test_y = dataloader.read_corpus(
            '/afs/csail.mit.edu/u/z/zhijing/proj/to_di/data/{}/'
            'test_tok.csv'.format(args.dataset),
            clean=False,
            MR=False,
            shuffle=True)

    nclasses = max(train_y) + 1
    # elif args.dataset == 'subj':
    #     data, label = dataloader.read_SUBJ(args.path)
    # elif args.dataset == 'cr':
    #     data, label = dataloader.read_CR(args.path)
    # elif args.dataset == 'mpqa':
    #     data, label = dataloader.read_MPQA(args.path)
    # elif args.dataset == 'trec':
    #     train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path)
    #     data = train_x + test_x
    #     label = None
    # elif args.dataset == 'sst':
    #     train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path)
    #     data = train_x + valid_x + test_x
    #     label = None
    # else:
    #     raise Exception("unknown dataset: {}".format(args.dataset))

    # if args.dataset == 'trec':

    # elif args.dataset != 'sst':
    #     train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split(
    #         data, label,
    #         nfold = 10,
    #         test_id = args.cv
    #     )

    model = Model(args.embedding, args.d, args.depth, args.dropout, args.cnn,
                  nclasses).cuda()
    need_grad = lambda x: x.requires_grad
    optimizer = optim.Adam(filter(need_grad, model.parameters()), lr=args.lr)

    train_x, train_y = dataloader.create_batches(
        train_x,
        train_y,
        args.batch_size,
        model.word2id,
    )
    # valid_x, valid_y = dataloader.create_batches(
    #     valid_x, valid_y,
    #     args.batch_size,
    #     emb_layer.word2id,
    # )
    test_x, test_y = dataloader.create_batches(
        test_x,
        test_y,
        args.batch_size,
        model.word2id,
    )

    best_test = 0
    # test_err = 1e+8
    for epoch in range(args.max_epoch):
        best_test = train_model(
            epoch,
            model,
            optimizer,
            train_x,
            train_y,
            # valid_x, valid_y,
            test_x,
            test_y,
            best_test,
            args.save_path)
        if args.lr_decay > 0:
            optimizer.param_groups[0]['lr'] *= args.lr_decay

    # sys.stdout.write("best_valid: {:.6f}\n".format(
    #     best_valid
    # ))
    sys.stdout.write("test_err: {:.6f}\n".format(best_test))
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--dataset_path",
                        type=str,
                        required=True,
                        help="Which dataset to attack.")
    parser.add_argument("--nclasses",
                        type=int,
                        default=2,
                        help="How many classes for classification.")
    parser.add_argument("--target_model",
                        type=str,
                        required=True,
                        choices=['wordLSTM', 'bert', 'wordCNN'],
                        help="Target models for text classification: fasttext, charcnn, word level lstm "
                             "For NLI: InferSent, ESIM, bert-base-uncased")
    parser.add_argument("--target_model_path",
                        type=str,
                        required=True,
                        help="pre-trained target model path")
    parser.add_argument("--word_embeddings_path",
                        type=str,
                        default='',
                        help="path to the word embeddings for the target model")
    parser.add_argument("--counter_fitting_embeddings_path",
                        type=str,
                        required=True,
                        help="path to the counter-fitting embeddings we used to find synonyms")
    parser.add_argument("--counter_fitting_cos_sim_path",
                        type=str,
                        default='',
                        help="pre-compute the cosine similarity scores based on the counter-fitting embeddings")
    parser.add_argument("--USE_cache_path",
                        type=str,
                        required=True,
                        help="Path to the USE encoder cache.")
    parser.add_argument("--output_dir",
                        type=str,
                        default='adv_results',
                        help="The output directory where the attack results will be written.")

    ## Model hyperparameters
    parser.add_argument("--sim_score_window",
                        default=15,
                        type=int,
                        help="Text length or token number to compute the semantic similarity score")
    parser.add_argument("--import_score_threshold",
                        default=-1.,
                        type=float,
                        help="Required mininum importance score.")
    parser.add_argument("--sim_score_threshold",
                        default=0.85,
                        type=float,
                        help="Required minimum semantic similarity score.")
    parser.add_argument("--synonym_num",
                        default=50,
                        type=int,
                        help="Number of synonyms to extract")
    parser.add_argument("--batch_size",
                        default=32,
                        type=int,
                        help="Batch size to get prediction")
    parser.add_argument("--data_size",
                        default=100,
                        type=int,
                        help="Data size to create adversaries")
    parser.add_argument("--perturb_ratio",
                        default=0.,
                        type=float,
                        help="Whether use random perturbation for ablation study")
    parser.add_argument("--max_seq_length",
                        default=128,
                        type=int,
                        help="max sequence length for BERT target model")

    args = parser.parse_args()

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    # get data to attack
    texts, labels = dataloader.read_corpus(args.dataset_path)
    data = list(zip(texts, labels))
    data = data[:args.data_size] # choose how many samples for adversary
    print("Data import finished!")

    # construct the model
    print("Building Model...")
    if args.target_model == 'wordLSTM':
        model = Model(args.word_embeddings_path, nclasses=args.nclasses).cuda()
        checkpoint = torch.load(args.target_model_path, map_location='cuda:0')
        model.load_state_dict(checkpoint)
    elif args.target_model == 'wordCNN':
        model = Model(args.word_embeddings_path, nclasses=args.nclasses, hidden_size=100, cnn=True).cuda()
        checkpoint = torch.load(args.target_model_path, map_location='cuda:0')
        model.load_state_dict(checkpoint)
    elif args.target_model == 'bert':
        model = NLI_infer_BERT(args.target_model_path, nclasses=args.nclasses, max_seq_length=args.max_seq_length)
    predictor = model.text_pred
    print("Model built!")
    maskedLM = Contextual_synonyms_BERT(args.target_model_path, max_seq_length=args.max_seq_length)
    maskedLM_predictor=maskedLM #.text_pred
    print("Masked LM BERT built!")
    # prepare synonym extractor
    # build dictionary via the embedding file
    idx2word = {}
    word2idx = {}

    print("Building vocab...")
    with open(args.counter_fitting_embeddings_path, 'r') as ifile:
        for line in ifile:
            word = line.split()[0]
            if word not in idx2word:
                idx2word[len(idx2word)] = word
                word2idx[word] = len(idx2word) - 1

    print("Building cos sim matrix...")
    if args.counter_fitting_cos_sim_path:
        # load pre-computed cosine similarity matrix if provided
        print('Load pre-computed cosine similarity matrix from {}'.format(args.counter_fitting_cos_sim_path))
        cos_sim = np.load(args.counter_fitting_cos_sim_path)
    else:
        # calculate the cosine similarity matrix
        print('Start computing the cosine similarity matrix!')
        embeddings = []
        with open(args.counter_fitting_embeddings_path, 'r') as ifile:
            for line in ifile:
                embedding = [float(num) for num in line.strip().split()[1:]]
                embeddings.append(embedding)
        embeddings = np.array(embeddings)
        product = np.dot(embeddings, embeddings.T)
        norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
        cos_sim = product / np.dot(norm, norm.T)
    print("Cos sim import finished!")

    # build the semantic similarity module
    use = USE(args.USE_cache_path)

    # start attacking
    orig_failures = 0.
    adv_failures = 0.
    changed_rates = []
    nums_queries = []
    orig_texts = []
    adv_texts = []
    true_labels = []
    new_labels = []
    log_file = open(os.path.join(args.output_dir, 'results_log'), 'a')

    stop_words_set = criteria.get_stopwords()
    print('Start attacking!')
    for idx, (text, true_label) in enumerate(data):
        if idx % 2 == 0:
            print('{} samples out of {} have been finished!'.format(idx, args.data_size))
        if args.perturb_ratio > 0.:
            new_text, num_changed, orig_label, \
            new_label, num_queries = random_attack(text, true_label, predictor, args.perturb_ratio, stop_words_set,
                                                    word2idx, idx2word, cos_sim, sim_predictor=use,
                                                    sim_score_threshold=args.sim_score_threshold,
                                                    import_score_threshold=args.import_score_threshold,
                                                    sim_score_window=args.sim_score_window,
                                                    synonym_num=args.synonym_num,
                                                    batch_size=args.batch_size)
        else:
            new_text, num_changed, orig_label, \
            new_label, num_queries = contextual_attack(text, true_label, predictor, maskedLM_predictor, stop_words_set,
                                            word2idx, idx2word, cos_sim, sim_predictor=use,
                                            sim_score_threshold=args.sim_score_threshold,
                                            import_score_threshold=args.import_score_threshold,
                                            sim_score_window=args.sim_score_window,
                                            synonym_num=args.synonym_num,
                                            batch_size=args.batch_size)
            '''
            new_text, num_changed, orig_label, \
            new_label, num_queries = attack(text, true_label, predictor, stop_words_set,
                                            word2idx, idx2word, cos_sim, sim_predictor=use,
                                            sim_score_threshold=args.sim_score_threshold,
                                            import_score_threshold=args.import_score_threshold,
                                            sim_score_window=args.sim_score_window,
                                            synonym_num=args.synonym_num,
                                            batch_size=args.batch_size)
            '''
        if true_label != orig_label:
            orig_failures += 1
        else:
            nums_queries.append(num_queries)
        if true_label != new_label:
            adv_failures += 1
        print("OLD TEXT: "+ ' '.join(text))
        print("NEW TEXT: "+ new_text)
        print("NUM CHANGED: "+str(num_changed))
        sys.stdout.flush()
        changed_rate = 1.0 * num_changed / len(text)

        if true_label == orig_label and true_label != new_label:
            changed_rates.append(changed_rate)
            orig_texts.append(' '.join(text))
            adv_texts.append(new_text)
            true_labels.append(true_label)
            new_labels.append(new_label)

    message = 'For target model {}: original accuracy: {:.3f}%, adv accuracy: {:.3f}%, ' \
              'avg changed rate: {:.3f}%, num of queries: {:.1f}\n'.format(args.target_model,
                                                                     (1-orig_failures/1000)*100,
                                                                     (1-adv_failures/1000)*100,
                                                                     np.mean(changed_rates)*100,
                                                                     np.mean(nums_queries))
    print(message)
    log_file.write(message)

    with open(os.path.join(args.output_dir, 'adversaries.txt'), 'w') as ofile:
        for orig_text, adv_text, true_label, new_label in zip(orig_texts, adv_texts, true_labels, new_labels):
            ofile.write('orig sent ({}):\t{}\nadv sent ({}):\t{}\n\n'.format(true_label, orig_text, new_label, adv_text))
def main():
    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    # get data to attack
    texts, labels = dataloader.read_corpus(args.dataset_path)
    data = list(zip(texts, labels))
    data = data[:args.data_size]  # choose how many samples for adversary
    print("Data import finished!")

    # construct the model
    print("Building Model...")
    if args.target_model == 'wordLSTM':
        model = Model(args.word_embeddings_path, nclasses=args.nclasses).cuda()
        checkpoint = torch.load(args.target_model_path, map_location='cuda:0')
        model.load_state_dict(checkpoint)
    elif args.target_model == 'wordCNN':
        model = Model(args.word_embeddings_path,
                      nclasses=args.nclasses,
                      hidden_size=100,
                      cnn=True).cuda()
        checkpoint = torch.load(args.target_model_path, map_location='cuda:0')
        model.load_state_dict(checkpoint)
    elif args.target_model == 'bert':
        model = NLI_infer_BERT(args.target_model_path,
                               nclasses=args.nclasses,
                               max_seq_length=args.max_seq_length)
    predictor = model.text_pred
    print("Model built!")

    # prepare synonym extractor
    # build dictionary via the embedding file
    idx2word = {}
    word2idx = {}

    # print("Building vocab...")
    # with open(args.counter_fitting_embeddings_path, 'r') as ifile:
    #     for line in ifile:
    #         word = line.split()[0]
    #         if word not in idx2word:
    #             idx2word[len(idx2word)] = word
    #             word2idx[word] = len(idx2word) - 1

    # print("Building cos sim matrix...")
    # if args.counter_fitting_cos_sim_path:
    #     # load pre-computed cosine similarity matrix if provided
    #     print('Load pre-computed cosine similarity matrix from {}'.format(args.counter_fitting_cos_sim_path))
    #     cos_sim = np.load(args.counter_fitting_cos_sim_path)
    # else:
    #     # calculate the cosine similarity matrix
    #     print('Start computing the cosine similarity matrix!')
    #     embeddings = []
    #     with open(args.counter_fitting_embeddings_path, 'r') as ifile:
    #         for line in ifile:
    #             embedding = [float(num) for num in line.strip().split()[1:]]
    #             embeddings.append(embedding)
    #     embeddings = np.array(embeddings)
    #     product = np.dot(embeddings, embeddings.T)
    #     norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
    #     cos_sim = product / np.dot(norm, norm.T)
    # print("Cos sim import finished!")

    # build the semantic similarity module
    # use = USE(args.USE_cache_path)
    use = None

    # start attacking
    orig_failures = 0.
    adv_failures = 0.
    changed_rates = []
    nums_queries = []
    orig_texts = []
    adv_texts = []
    true_labels = []
    new_labels = []
    log_file = open(os.path.join(args.output_dir, 'results_log'), 'a')

    stop_words_set = criteria.get_stopwords()
    print('Start attacking!')
    for idx, (text, true_label) in enumerate(data):
        if idx % 20 == 0:
            print('{} samples out of {} have been finished!'.format(
                idx, args.data_size))
        if args.perturb_ratio > 0.:
            new_text, num_changed, orig_label, \
            new_label, num_queries = random_attack(text, true_label, predictor, args.perturb_ratio, stop_words_set,
                                                    word2idx, idx2word, cos_sim, sim_predictor=use,
                                                    sim_score_threshold=args.sim_score_threshold,
                                                    import_score_threshold=args.import_score_threshold,
                                                    sim_score_window=args.sim_score_window,
                                                    synonym_num=args.synonym_num,
                                                    batch_size=args.batch_size)
        else:
            new_text, num_changed, orig_label, \
            new_label, num_queries = attack(text, true_label, predictor, model, batch_size=args.batch_size)

        print(true_label, orig_label, new_label)
        print("orig texts:", text)
        if true_label != orig_label:
            orig_failures += 1
            print("orig failure")
        else:
            nums_queries.append(num_queries)
        if orig_label != new_label:
            adv_failures += 1
            print(
                f"attack successful: {adv_failures}/{idx + 1}={adv_failures / (idx + 1)}"
            )

        changed_rate = 1.0 * num_changed / len(text)

        if orig_label != new_label:
            changed_rates.append(changed_rate)
            orig_texts.append(text)
            adv_texts.append(new_text)
            true_labels.append(true_label)
            new_labels.append(new_label)

    message = 'For target model {}: original accuracy: {:.3f}%, adv accuracy: {:.3f}%, ' \
              'avg changed rate: {:.3f}%, num of queries: {:.1f}\n'.format(args.target_model,
                                                                     (1-orig_failures/args.data_size)*100,
                                                                     (1-adv_failures/args.data_size)*100,
                                                                     np.mean(changed_rates)*100,
                                                                     np.mean(nums_queries))
    print(message)
    log_file.write(message)

    # with open(os.path.join(args.output_dir, 'adversaries.txt'), 'w') as ofile:
    #     for orig_text, adv_text, true_label, new_label in zip(orig_texts, adv_texts, true_labels, new_labels):
    #         ofile.write('orig sent ({}):\t{}\nadv sent ({}):\t{}\n\n'.format(true_label, orig_text, new_label, adv_text))
    adv_data = {
        'adv_text': adv_texts,
        'orig_text': orig_texts,
        'true_labels': true_labels,
        'new_labels': new_labels
    }
    import joblib
    joblib.dump(adv_data, os.path.join(args.output_dir, args.save))
示例#4
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--dataset_path",
                        type=str,
                        required=True,
                        help="Which dataset to attack.")
    parser.add_argument("--nclasses",
                        type=int,
                        default=2,
                        help="How many classes for classification.")
    parser.add_argument(
        "--target_model",
        type=str,
        required=True,
        choices=['wordLSTM', 'bert', 'wordCNN'],
        help=
        "Target models for text classification: fasttext, charcnn, word level lstm "
        "For NLI: InferSent, ESIM, bert-base-uncased")
    parser.add_argument("--target_model_path",
                        type=str,
                        required=True,
                        help="pre-trained target model path")
    parser.add_argument(
        "--word_embeddings_path",
        type=str,
        default='',
        help="path to the word embeddings for the target model")
    parser.add_argument(
        "--counter_fitting_embeddings_path",
        type=str,
        required=True,
        help="path to the counter-fitting embeddings we used to find synonyms")
    parser.add_argument(
        "--counter_fitting_cos_sim_path",
        type=str,
        default='',
        help=
        "pre-compute the cosine similarity scores based on the counter-fitting embeddings"
    )
    parser.add_argument("--USE_cache_path",
                        type=str,
                        required=True,
                        help="Path to the USE encoder cache.")
    parser.add_argument(
        "--output_dir",
        type=str,
        default='adv_results',
        help="The output directory where the attack results will be written.")

    ## Model hyperparameters
    parser.add_argument(
        "--sim_score_window",
        default=15,
        type=int,
        help=
        "Text length or token number to compute the semantic similarity score")
    parser.add_argument("--import_score_threshold",
                        default=-1.,
                        type=float,
                        help="Required mininum importance score.")
    parser.add_argument("--sim_score_threshold",
                        default=0.7,
                        type=float,
                        help="Required minimum semantic similarity score.")
    parser.add_argument("--synonym_num",
                        default=5000,
                        type=int,
                        help="Number of synonyms to extract")
    parser.add_argument("--batch_size",
                        default=32,
                        type=int,
                        help="Batch size to get prediction")
    parser.add_argument("--data_size",
                        default=1000,
                        type=int,
                        help="Data size to create adversaries")
    parser.add_argument(
        "--perturb_ratio",
        default=0.,
        type=float,
        help="Whether use random perturbation for ablation study")
    parser.add_argument("--max_seq_length",
                        default=128,
                        type=int,
                        help="max sequence length for BERT target model")
    parser.add_argument("--target_dataset",
                        default="imdb",
                        type=str,
                        help="Dataset Name")
    parser.add_argument("--fuzz",
                        default=0,
                        type=int,
                        help="Word Pruning Value")
    parser.add_argument("--top_k_words",
                        default=1000000,
                        type=int,
                        help="Top K Words")
    parser.add_argument("--allowed_qrs",
                        default=1000000,
                        type=int,
                        help="Allowerd qrs")

    args = parser.parse_args()

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    # get data to attack
    texts, labels = dataloader.read_corpus(args.dataset_path, csvf=False)
    data = list(zip(texts, labels))
    data = data[:args.data_size]  # choose how many samples for adversary
    print("Data import finished!")

    # construct the model
    print("Building Model...")
    if args.target_model == 'wordLSTM':
        model = Model(args.word_embeddings_path, nclasses=args.nclasses).cuda()
        checkpoint = torch.load(args.target_model_path, map_location='cuda:0')
        model.load_state_dict(checkpoint)
    elif args.target_model == 'wordCNN':
        model = Model(args.word_embeddings_path,
                      nclasses=args.nclasses,
                      hidden_size=150,
                      cnn=True).cuda()
        checkpoint = torch.load(args.target_model_path, map_location='cuda:0')
        model.load_state_dict(checkpoint)
    elif args.target_model == 'bert':
        model = NLI_infer_BERT(args.target_model_path,
                               nclasses=args.nclasses,
                               max_seq_length=args.max_seq_length)
    predictor = model.text_pred
    print("Model built!")

    # prepare synonym extractor
    # build dictionary via the embedding file
    idx2word = {}
    word2idx = {}
    sim_lis = []
    word_embedding = defaultdict(list)

    print("Building vocab...")
    with open(args.counter_fitting_embeddings_path, 'r') as ifile:
        for line in ifile:
            word = line.split()[0]
            embedding = [float(num) for num in line.strip().split()[1:]]
            if word not in idx2word:
                idx2word[len(idx2word)] = word
                word2idx[word] = len(idx2word) - 1
                word_embedding[word] = embedding

    print("Building cos sim matrix...")
    if args.counter_fitting_cos_sim_path:
        print('Load pre-computed cosine similarity matrix from {}'.format(
            args.counter_fitting_cos_sim_path))
        with open(args.counter_fitting_cos_sim_path, "rb") as fp:
            sim_lis = pickle.load(fp)
        # load pre-computed cosine similarity matrix if provided
        #print('Load pre-computed cosine similarity matrix from {}'.format(args.counter_fitting_cos_sim_path))
        #cos_sim = np.load(args.counter_fitting_cos_sim_path)
    else:
        print('Start computing the cosine similarity matrix!')
        embeddings = []
        with open(args.counter_fitting_embeddings_path, 'r') as ifile:
            for line in ifile:
                embedding = [float(num) for num in line.strip().split()[1:]]
                embeddings.append(embedding)
        embeddings = np.array(embeddings)
        print(embeddings.T.shape)
        norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
        embeddings = np.asarray(embeddings / norm, "float64")
        cos_sim = np.dot(embeddings, embeddings.T)

    print("Cos sim import finished!")

    # build the semantic similarity module
    use = USE(args.USE_cache_path)

    # start attacking
    orig_failures = 0.
    adv_failures = 0.
    avg = 0.
    changed_rates = []
    nums_queries = []
    orig_texts = []
    adv_texts = []
    true_labels = []
    new_labels = []
    wrds = []
    s_queries = []
    f_queries = []
    sims_final = []
    success = []
    results = []
    fails = []
    scrs = []

    log_file = "results_context/" + args.target_model + "/" + args.target_dataset + "/log.txt"
    result_file = "results_context/" + args.target_model + "/" + args.target_dataset + "/results.csv"
    wts_file = "tfidf_weights/" + "tfidf-" + args.target_dataset + ".csv"
    fail_file = "fails_tfidf/" + args.target_model + "/" + args.target_dataset + "/fails.csv"

    stop_words_set = criteria.get_stopwords()
    print('Start attacking!')

    for idx, (text, true_label) in enumerate(data):
        #print(text)
        #print(true_label)
        if idx % 20 == 0:
            print(str(idx) + " Samples Done")
            print(len(success))
            print(np.mean(changed_rates))
        if args.perturb_ratio > 0.:
            new_text, num_changed, orig_label, \
            new_label, num_queries = random_attack(text, true_label, predictor,
                                                    args.perturb_ratio, stop_words_set,
                                                    word2idx, idx2word, cos_sim, sim_predictor=use,
                                                    sim_score_threshold=args.sim_score_threshold,
                                                    import_score_threshold=args.import_score_threshold,
                                                    sim_score_window=args.sim_score_window,
                                                    synonym_num=args.synonym_num,
                                                    batch_size=args.batch_size)
        else:
            new_text, num_changed, orig_label, \
            new_label, num_queries,pwrds,perts = attack(args.fuzz,args.top_k_words,args.allowed_qrs,
                                            wts_file,idx,text, true_label, predictor, stop_words_set,
                                            word2idx, idx2word, sim_lis, word_embedding , sim_predictor=use,
                                            sim_score_threshold=args.sim_score_threshold,
                                            import_score_threshold=args.import_score_threshold,
                                            sim_score_window=args.sim_score_window,
                                            synonym_num=args.synonym_num,
                                            batch_size=args.batch_size)
        scrs.append(perts)
        if true_label != orig_label:
            orig_failures += 1
        else:
            nums_queries.append(num_queries)

        if true_label != new_label:
            adv_failures += 1
            #f_queries.append(num_queries)

        changed_rate = 1.0 * num_changed / len(text)
        if true_label == orig_label and true_label != new_label:
            temp = []
            s_queries.append(num_queries)
            success.append(idx)
            changed_rates.append(changed_rate)
            orig_texts.append(' '.join(text))
            adv_texts.append(new_text)
            true_labels.append(true_label)
            new_labels.append(new_label)
            wrds.append(pwrds)
            temp.append(idx)
            temp.append(' '.join(text))
            temp.append(new_text)
            temp.append(num_queries)
            temp.append(changed_rate * 100)
            results.append(temp)
            print("Attacked: " + str(idx))
        if true_label == orig_label and true_label == new_label:
            f_queries.append(num_queries)
            temp1 = []
            temp1.append(idx)
            temp1.append(' '.join(text))
            temp1.append(new_text)
            temp1.append(num_queries)
            fails.append(temp1)

    message = 'For target model {} on dataset window size {} with WP val {} top words {} qrs {} : ' \
              'original accuracy: {:.3f}%, adv accuracy: {:.3f}%, ' \
              'avg changed rate: {:.3f}%, num of queries: {:.1f}\n'.format(args.target_model,
                                                                      args.sim_score_window,
                                                                      args.fuzz,
                                                                      args.top_k_words,args.allowed_qrs,
                                                                     (1-orig_failures/1000)*100,
                                                                     (1-adv_failures/1000)*100,
                                                                     np.mean(changed_rates)*100,
                                                                     np.mean(nums_queries))
    print(message)

    log = open(log_file, 'a')
    log.write(message)
    with open(result_file, 'w') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerows(results)
    #with open('scores-mr.csv','w') as csvfile:
    #    csvwriter = csv.writer(csvfile)
    #    csvwriter.writerows(scrs)


#    with open(fail_file,'w') as csvfile:
#        csvwriter = csv.writer(csvfile)
#        csvwriter.writerows(fails)
# writing the data rows
    print(avg)
    print(len(f_queries))
    print(f_queries)

    with open(os.path.join(args.output_dir, 'adversaries.txt'), 'w') as ofile:
        for orig_text, adv_text, true_label, new_label in zip(
                orig_texts, adv_texts, true_labels, new_labels):
            ofile.write('orig sent ({}):\t{}\nadv sent ({}):\t{}\n\n'.format(
                true_label, orig_text, new_label, adv_text))
def main(args):
    max_length = args.max_length
    if args.dataset == 'mr':
        #     data, label = dataloader.read_MR(args.path)
        #     train_x, train_y, test_x, test_y = dataloader.cv_split2(
        #         data, label,
        #         nfold=10,
        #         valid_id=args.cv
        #     )
        #
        #     if args.save_data_split:
        #         save_data(train_x, train_y, args.path, 'train')
        #         save_data(test_x, test_y, args.path, 'test')
        #     train_x, train_y = dataloader.read_corpus('/data/medg/misc/jindi/nlp/datasets/mr/train.txt', max_length=max_length)
        #     test_x, test_y = dataloader.read_corpus('/data/medg/misc/jindi/nlp/datasets/mr/test.txt', max_length=max_length)
        train_x, train_y = dataloader.read_corpus(
            '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/mr/train.txt',
            max_length=max_length)
        test_x, test_y = dataloader.read_corpus(
            '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/mr/test.txt',
            max_length=max_length)
    elif args.dataset == 'imdb':
        train_x, train_y = dataloader.read_corpus(os.path.join(
            '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/imdb',
            'train_tok.csv'),
                                                  clean=False,
                                                  MR=True,
                                                  shuffle=False,
                                                  max_length=max_length)
        test_x, test_y = dataloader.read_corpus(os.path.join(
            '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/imdb',
            'test_tok.csv'),
                                                clean=False,
                                                MR=True,
                                                shuffle=False,
                                                max_length=max_length)
    else:
        fix_labels = False
        if args.dataset == "yelp" or args.dataset == "fake" or args.dataset == "ag":
            fix_labels = True

        train_x, train_y = dataloader.read_corpus(
            '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/{}/'
            'train_tok.csv'.format(args.dataset),
            clean=False,
            MR=True,
            shuffle=False,
            fix_labels=fix_labels,
            max_length=max_length)
        test_x, test_y = dataloader.read_corpus(
            '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/{}/'
            'test_tok.csv'.format(args.dataset),
            clean=False,
            MR=True,
            shuffle=False,
            fix_labels=fix_labels,
            max_length=max_length)

    nclasses = max(train_y) + 1
    # elif args.dataset == 'subj':
    #     data, label = dataloader.read_SUBJ(args.path)
    # elif args.dataset == 'cr':
    #     data, label = dataloader.read_CR(args.path)
    # elif args.dataset == 'mpqa':
    #     data, label = dataloader.read_MPQA(args.path)
    # elif args.dataset == 'trec':
    #     train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path)
    #     data = train_x + test_x
    #     label = None
    # elif args.dataset == 'sst':
    #     train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path)
    #     data = train_x + valid_x + test_x
    #     label = None
    # else:
    #     raise Exception("unknown dataset: {}".format(args.dataset))

    # if args.dataset == 'trec':

    # elif args.dataset != 'sst':
    #     train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split(
    #         data, label,
    #         nfold = 10,
    #         test_id = args.cv
    #     )

    log_file = open(
        os.path.join(os.path.dirname(args.save_path),
                     f'{os.path.basename(args.save_path)}.log'), 'a')

    model = Model(args.embedding, args.d, args.depth, args.dropout, args.cnn,
                  nclasses).cuda()
    need_grad = lambda x: x.requires_grad
    optimizer = optim.Adam(filter(need_grad, model.parameters()), lr=args.lr)

    train_x, train_y = dataloader.create_batches(train_x,
                                                 train_y,
                                                 args.batch_size,
                                                 model.word2id,
                                                 max_len=max_length)
    # valid_x, valid_y = dataloader.create_batches(
    #     valid_x, valid_y,
    #     args.batch_size,
    #     emb_layer.word2id, max_len=max_length)
    test_x, test_y = dataloader.create_batches(test_x,
                                               test_y,
                                               args.batch_size,
                                               model.word2id,
                                               max_len=max_length)

    lengths = np.array(
        [len(seq) for batch in train_x for seq in batch.t().contiguous()])
    log_file.write("Run with command:\n" +
                   " ".join([arg for arg in sys.argv[1:]]) + "\n")
    log_file.write("\n")
    log_file.write(f"Max seq length found = {np.max(lengths)}\n")
    log_file.flush()

    best_test = 0
    # test_err = 1e+8
    progress = tqdm(total=args.max_epoch)
    for epoch in range(args.max_epoch):
        best_test = train_model(
            epoch,
            model,
            optimizer,
            train_x,
            train_y,
            # valid_x, valid_y,
            test_x,
            test_y,
            best_test,
            args.save_path,
            log_file)
        if args.lr_decay > 0:
            optimizer.param_groups[0]['lr'] *= args.lr_decay
        if epoch % 20 == 0:
            progress.update(20)
            log_file.write(str(progress) + '\n')
            log_file.flush()

    # sys.stdout.write("best_valid: {:.6f}\n".format(
    #     best_valid
    # ))
    sys.stdout.write("test_acc: {:.6f}\n".format(best_test))
    log_file.write("test_acc: {:.6f}\n".format(best_test))
    log_file.flush()
    log_file.close()
示例#6
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--dataset_path",
                        type=str,
                        required=True,
                        help="Which dataset to attack.")
    parser.add_argument("--nclasses",
                        type=int,
                        default=2,
                        help="How many classes for classification.")
    parser.add_argument(
        "--target_model",
        type=str,
        required=True,
        choices=['wordLSTM', 'bert', 'wordCNN'],
        help=
        "Target models for text classification: fasttext, charcnn, word level lstm "
        "For NLI: InferSent, ESIM, bert-base-uncased")
    parser.add_argument("--target_model_path",
                        type=str,
                        required=True,
                        help="pre-trained target model path")
    parser.add_argument(
        "--word_embeddings_path",
        type=str,
        default='',
        help="path to the word embeddings for the target model")
    parser.add_argument(
        "--counter_fitting_embeddings_path",
        type=str,
        required=True,
        help="path to the counter-fitting embeddings we used to find synonyms")
    parser.add_argument(
        "--counter_fitting_cos_sim_path",
        type=str,
        default='',
        help=
        "pre-compute the cosine similarity scores based on the counter-fitting embeddings"
    )
    parser.add_argument("--USE_cache_path",
                        type=str,
                        required=True,
                        help="Path to the USE encoder cache.")
    parser.add_argument(
        "--output_dir",
        type=str,
        default='adv_results',
        help="The output directory where the attack results will be written.")

    ## Model hyperparameters
    parser.add_argument(
        "--sim_score_window",
        default=15,
        type=int,
        help=
        "Text length or token number to compute the semantic similarity score")
    parser.add_argument("--import_score_threshold",
                        default=-1.,
                        type=float,
                        help="Required mininum importance score.")
    parser.add_argument("--sim_score_threshold",
                        default=0.7,
                        type=float,
                        help="Required minimum semantic similarity score.")
    parser.add_argument("--synonym_num",
                        default=50,
                        type=int,
                        help="Number of synonyms to extract")
    parser.add_argument("--batch_size",
                        default=32,
                        type=int,
                        help="Batch size to get prediction")
    parser.add_argument("--data_size",
                        default=1000,
                        type=int,
                        help="Data size to create adversaries")
    parser.add_argument(
        "--perturb_ratio",
        default=0.,
        type=float,
        help="Whether use random perturbation for ablation study")
    parser.add_argument("--max_seq_length",
                        default=128,
                        type=int,
                        help="max sequence length for BERT target model")

    args = parser.parse_args()

    # get data to attack
    texts, labels = dataloader.read_corpus(args.dataset_path)
    data = list(zip(texts, labels))
    data = data[:args.data_size]  # choose how many samples for adversary
    print("Data import finished!")

    # construct the model
    print("Building Model...")
    model = BERTInference(args.target_model_path,
                          nclasses=args.nclasses,
                          max_seq_length=args.max_seq_length)
    predictor = model.text_pred
    print("Model built!")

    # prepare synonym extractor
    # build dictionary via the embedding file
    idx2word = {}
    word2idx = {}

    print("Building vocab...")
    with open(args.counter_fitting_embeddings_path, 'r') as ifile:
        for line in ifile:
            word = line.split()[0]
            if word not in idx2word:
                idx2word[len(idx2word)] = word
                word2idx[word] = len(idx2word) - 1

    print("Building cos sim matrix...")
    cos_sim = np.load(args.counter_fitting_cos_sim_path)
    print("Cos sim import finished!")

    # build the semantic similarity module
    # use = UniversalSentenceEncoder(args.USE_cache_path)
    use = UniversalSentenceEncoder()

    stop_words_set = criteria.get_stopwords()
    print('Start attacking!')
    changed_rates = []
    total_time, success, total = 0, 0, 0
    for idx, (text, true_label) in enumerate(data):
        tick = time.time()
        new_text, num_changed, orig_label, \
        new_label, num_queries = attack(text, true_label, predictor, stop_words_set,
                                        word2idx, idx2word, cos_sim, sim_predictor=use,
                                        sim_score_threshold=args.sim_score_threshold,
                                        import_score_threshold=args.import_score_threshold,
                                        sim_score_window=args.sim_score_window,
                                        synonym_num=args.synonym_num,
                                        batch_size=args.batch_size)

        old_text = ' '.join(text)
        print(f"Original: {old_text}")
        print()
        print(f"New:      {new_text}")
        print("--------------------------------------------------------------")

        changed_rate = 1.0 * num_changed / len(text)
        if true_label == orig_label and true_label != new_label:
            changed_rates.append(changed_rate)
            tock = time.time()
            total_time += tock - tick
            success += 1
        total += 1
    print(
        f"Time: {total_time}\tAvg. Change Rate: {np.mean(changed_rates)*100}\tSuccess Rate: {(success / total) * 100}"
    )