示例#1
0
文件: fool.py 项目: dongxinshuai/ASCC
def fool_text_classifier():
    clean_samples_cap = args.clean_samples_cap  # 1000
    print('clean_samples_cap:', clean_samples_cap)

    # get tokenizer
    dataset = args.dataset
    tokenizer = get_tokenizer(opt)

    # Read data set
    x_test = y_test = None
    test_texts = None
    if dataset == 'imdb':
        train_texts, train_labels, dev_texts, dev_labels, test_texts, test_labels = split_imdb_files(
            opt)
        if args.level == 'word':
            x_train, y_train, x_test, y_test = word_process(
                train_texts, train_labels, test_texts, test_labels, dataset)
        elif args.level == 'char':
            x_train, y_train, x_test, y_test = char_process(
                train_texts, train_labels, test_texts, test_labels, dataset)
    elif dataset == 'agnews':
        train_texts, train_labels, test_texts, test_labels = split_agnews_files(
        )
        if args.level == 'word':
            x_train, y_train, x_test, y_test = word_process(
                train_texts, train_labels, test_texts, test_labels, dataset)
        elif args.level == 'char':
            x_train, y_train, x_test, y_test = char_process(
                train_texts, train_labels, test_texts, test_labels, dataset)
    elif dataset == 'yahoo':
        train_texts, train_labels, test_texts, test_labels = split_yahoo_files(
        )
        if args.level == 'word':
            x_train, y_train, x_test, y_test = word_process(
                train_texts, train_labels, test_texts, test_labels, dataset)
        elif args.level == 'char':
            x_train, y_train, x_test, y_test = char_process(
                train_texts, train_labels, test_texts, test_labels, dataset)

    # Write clean examples into a txt file
    clean_texts_path = r'./fool_result/{}/clean_{}.txt'.format(
        dataset, str(clean_samples_cap))
    if not os.path.isfile(clean_texts_path):
        write_origin_input_texts(clean_texts_path, test_texts)

    # Select the model and load the trained weights
    assert args.model[:4] == args.level
    model = None
    if args.model == "word_cnn":
        model = word_cnn(dataset)
    elif args.model == "word_bdlstm":
        model = bd_lstm(dataset)
    elif args.model == "char_cnn":
        model = char_cnn(dataset)
    elif args.model == "word_lstm":
        model = lstm(dataset)
    model_path = r'./runs/{}/{}.dat'.format(dataset, args.model)
    model.load_weights(model_path)
    print('model path:', model_path)

    # evaluate classification accuracy of model on clean samples
    scores_origin = model.evaluate(x_test[:clean_samples_cap],
                                   y_test[:clean_samples_cap])
    print('clean samples origin test_loss: %f, accuracy: %f' %
          (scores_origin[0], scores_origin[1]))
    all_scores_origin = model.evaluate(x_test, y_test)
    print('all origin test_loss: %f, accuracy: %f' %
          (all_scores_origin[0], all_scores_origin[1]))

    grad_guide = ForwardGradWrapper(model)
    classes_prediction = grad_guide.predict_classes(x_test[:clean_samples_cap])

    print('Crafting adversarial examples...')
    successful_perturbations = 0
    failed_perturbations = 0
    sub_rate_list = []
    NE_rate_list = []

    start_cpu = time.clock()
    adv_text_path = r'./fool_result/{}/{}/adv_{}.txt'.format(
        dataset, args.model, str(clean_samples_cap))
    change_tuple_path = r'./fool_result/{}/{}/change_tuple_{}.txt'.format(
        dataset, args.model, str(clean_samples_cap))
    file_1 = open(adv_text_path, "a")
    file_2 = open(change_tuple_path, "a")
    for index, text in enumerate(test_texts[:clean_samples_cap]):
        sub_rate = 0
        NE_rate = 0
        if np.argmax(y_test[index]) == classes_prediction[index]:
            # If the ground_true label is the same as the predicted label
            adv_doc, adv_y, sub_rate, NE_rate, change_tuple_list = adversarial_paraphrase(
                input_text=text,
                true_y=np.argmax(y_test[index]),
                grad_guide=grad_guide,
                tokenizer=tokenizer,
                dataset=dataset,
                level=args.level)
            if adv_y != np.argmax(y_test[index]):
                successful_perturbations += 1
                print('{}. Successful example crafted.'.format(index))
            else:
                failed_perturbations += 1
                print('{}. Failure.'.format(index))

            text = adv_doc
            sub_rate_list.append(sub_rate)
            NE_rate_list.append(NE_rate)
            file_2.write(str(index) + str(change_tuple_list) + '\n')
        file_1.write(text + " sub_rate: " + str(sub_rate) + "; NE_rate: " +
                     str(NE_rate) + "\n")
    end_cpu = time.clock()
    print('CPU second:', end_cpu - start_cpu)
    mean_sub_rate = sum(sub_rate_list) / len(sub_rate_list)
    mean_NE_rate = sum(NE_rate_list) / len(NE_rate_list)
    print('mean substitution rate:', mean_sub_rate)
    print('mean NE rate:', mean_NE_rate)
    file_1.close()
    file_2.close()
示例#2
0
文件: fool.py 项目: dongxinshuai/ASCC
def fool_text_classifier_pytorch(model, dataset='imdb'):
    clean_samples_cap = 100
    print('clean_samples_cap:', clean_samples_cap)

    # get tokenizer
    tokenizer = get_tokenizer(opt)

    # Read data set
    x_test = y_test = None
    test_texts = None
    if dataset == 'imdb':
        train_texts, train_labels, dev_texts, dev_labels, test_texts, test_labels = split_imdb_files(
            opt)
        x_train, y_train, x_test, y_test = word_process(
            train_texts, train_labels, test_texts, test_labels, dataset)

    elif dataset == 'agnews':
        train_texts, train_labels, test_texts, test_labels = split_agnews_files(
        )
        x_train, y_train, x_test, y_test = word_process(
            train_texts, train_labels, test_texts, test_labels, dataset)

    elif dataset == 'yahoo':
        train_texts, train_labels, test_texts, test_labels = split_yahoo_files(
        )
        x_train, y_train, x_test, y_test = word_process(
            train_texts, train_labels, test_texts, test_labels, dataset)

    grad_guide = ForwardGradWrapper_pytorch(model)
    classes_prediction = grad_guide.predict_classes(x_test[:clean_samples_cap])

    print('Crafting adversarial examples...')
    successful_perturbations = 0
    failed_perturbations = 0
    sub_rate_list = []
    NE_rate_list = []

    start_cpu = time.clock()
    adv_text_path = r'./fool_result/{}/adv_{}.txt'.format(
        dataset, str(clean_samples_cap))
    change_tuple_path = r'./fool_result/{}/change_tuple_{}.txt'.format(
        dataset, str(clean_samples_cap))
    file_1 = open(adv_text_path, "a")
    file_2 = open(change_tuple_path, "a")
    for index, text in enumerate(test_texts[:clean_samples_cap]):
        sub_rate = 0
        NE_rate = 0
        if np.argmax(y_test[index]) == classes_prediction[index]:
            # If the ground_true label is the same as the predicted label
            adv_doc, adv_y, sub_rate, NE_rate, change_tuple_list = adversarial_paraphrase(
                input_text=text,
                true_y=np.argmax(y_test[index]),
                grad_guide=grad_guide,
                tokenizer=tokenizer,
                dataset=dataset,
                level='word')
            if adv_y != np.argmax(y_test[index]):
                successful_perturbations += 1
                print('{}. Successful example crafted.'.format(index))
            else:
                failed_perturbations += 1
                print('{}. Failure.'.format(index))

            text = adv_doc
            sub_rate_list.append(sub_rate)
            NE_rate_list.append(NE_rate)
            file_2.write(str(index) + str(change_tuple_list) + '\n')
        file_1.write(text + " sub_rate: " + str(sub_rate) + "; NE_rate: " +
                     str(NE_rate) + "\n")
    end_cpu = time.clock()
    print('CPU second:', end_cpu - start_cpu)
    mean_sub_rate = sum(sub_rate_list) / len(sub_rate_list)
    mean_NE_rate = sum(NE_rate_list) / len(NE_rate_list)
    print('mean substitution rate:', mean_sub_rate)
    print('mean NE rate:', mean_NE_rate)
    file_1.close()
    file_2.close()
示例#3
0
    # Choose some female tweets
    tweets, = np.where(y_test[:test_samples_cap] == 0)

    print('Crafting adversarial examples...')
    successful_perturbations = 0
    failed_perturbations = 0
    adversarial_text_data = []
    adversarial_preds = np.array(preds)

    for index, doc in enumerate(docs_test[:test_samples_cap]):
        correct_classifed = False
        if y_test[index] == preds[index]:
            adv_doc, (y, adv_y) = adversarial_paraphrase(
                doc,
                grad_guide,
                target=1 - y_test[index],
                use_typos=use_typos,
                use_synonyms=use_synonyms,
                mode=mode)
            correct_classifed = True

        if correct_classifed:
            pred = np.round(adv_y)
            if pred != preds[index]:
                successful_perturbations += 1
                print('{}. Successful example crafted.'.format(index))
            else:
                failed_perturbations += 1
                print('{}. Failure.'.format(index))

            adversarial_preds[index] = pred
示例#4
0
    print('Model accuracy on test:', accuracy)

    # Choose some female tweets
    female_indices, = np.where(y_test[:test_samples_cap] == 0)

    print('Crafting adversarial examples...')
    successful_perturbations = 0
    failed_perturbations = 0
    adversarial_text_data = []
    adversarial_preds = np.array(preds)

    for index, doc in enumerate(docs_test[:test_samples_cap]):
        if y_test[index] == 0 and preds[index] == 0:
            # If model prediction is correct, and the true class is female,
            # craft adversarial text
            adv_doc, (y, adv_y) = adversarial_paraphrase(
                    doc, grad_guide, target=1)

            pred = np.round(adv_y)
            if pred != preds[index]:
                successful_perturbations += 1
                print('{}. Successful example crafted.'.format(index))
            else:
                failed_perturbations += 1
                print('{}. Failure.'.format(index))

            adversarial_preds[index] = pred
            adversarial_text_data.append({
                'index': index,
                'doc': clean(doc),
                'adv': clean(adv_doc),
                'success': pred != preds[index],
示例#5
0
def fool_text_classifier():
    dataset = args.dataset
    print('dataset: {}; model: {}; level: {}.'.format(dataset, args.model, args.level))

    clean_samples_cap = args.clean_samples_cap  # 1000
    print('clean_samples_cap:', clean_samples_cap)

    # get tokenizer
    tokenizer = get_tokenizer(dataset)

    # Load and process data set
    data_helper = DataHelper(dataset, args.level)
    train_texts, train_labels, test_texts, test_labels = data_helper.load_data()
    x_train, y_train, x_test, y_test = data_helper.processing(need_shuffle=False)

    # Write clean examples into a txt file
    clean_texts_path = r'./fool_result/{}/clean_{}.txt'.format(dataset, str(clean_samples_cap))
    if not os.path.isfile(clean_texts_path):
        write_origin_input_texts(clean_texts_path, test_texts)

    # Select the model and load the trained weights
    assert args.model[:4] == args.level
    model = None
    if args.model == "word_cnn":
        model = word_cnn(dataset)
    elif args.model == "word_bdlstm":
        model = bd_lstm(dataset)
    elif args.model == "char_cnn":
        model = char_cnn(dataset)
    elif args.model == "word_lstm":
        model = lstm(dataset)
    model_filename = r'./runs/{}/{}.dat'.format(dataset, args.model)
    model.load_weights(model_filename)
    print('model path:', model_filename)

    # evaluate classification accuracy of model on clean samples
    scores_origin = model.evaluate(x_test[:clean_samples_cap], y_test[:clean_samples_cap])
    print('clean samples origin test_loss: %f, accuracy: %f' % (scores_origin[0], scores_origin[1]))
    all_scores_origin = model.evaluate(x_test, y_test)
    print('all origin test_loss: %f, accuracy: %f' % (all_scores_origin[0], all_scores_origin[1]))

    grad_guide = ForwardGradWrapper(model)
    classes_prediction = grad_guide.predict_classes(x_test[: clean_samples_cap])

    print('Crafting adversarial examples...')
    successful_perturbations = 0
    failed_perturbations = 0
    sub_rate_list = []
    NE_rate_list = []

    adv_text_filename = r'./fool_result/{}/{}/adv_{}.txt'.format(dataset, args.model, str(clean_samples_cap))
    change_tuple_filename = r'./fool_result/{}/{}/change_tuple_{}.txt'.format(dataset, args.model, str(clean_samples_cap))
    fool_result_path = os.path.split(adv_text_filename)[0]
    if not os.path.exists(fool_result_path):
        os.makedirs(fool_result_path)

    start_cpu = time.clock()
    with open(adv_text_filename, "a") as f1, open(change_tuple_filename, "a") as f2:
        for index, text in enumerate(test_texts[: clean_samples_cap]):
            sub_rate = 0
            NE_rate = 0
            if np.argmax(y_test[index]) == classes_prediction[index]:
                # If the ground_true label is the same as the predicted label
                adv_doc, adv_y, sub_rate, NE_rate, change_tuple_list = adversarial_paraphrase(input_text=text,
                                                                                              true_y=np.argmax(
                                                                                                  y_test[index]),
                                                                                              grad_guide=grad_guide,
                                                                                              tokenizer=tokenizer,
                                                                                              dataset=dataset,
                                                                                              level=args.level)
                if adv_y != np.argmax(y_test[index]):
                    successful_perturbations += 1
                    print('{}. Successful example crafted.'.format(index))
                else:
                    failed_perturbations += 1
                    print('{}. Failure.'.format(index))

                text = adv_doc
                sub_rate_list.append(sub_rate)
                NE_rate_list.append(NE_rate)
                f2.write(str(index) + str(change_tuple_list) + '\n')
            f1.write(text + " sub_rate: " + str(sub_rate) + "; NE_rate: " + str(NE_rate) + "\n")
        end_cpu = time.clock()
        print('CPU second:', end_cpu - start_cpu)
        mean_sub_rate = sum(sub_rate_list) / len(sub_rate_list)
        mean_NE_rate = sum(NE_rate_list) / len(NE_rate_list)
        print('mean substitution rate:', mean_sub_rate)
        print('mean NE rate:', mean_NE_rate)
示例#6
0
    # Choose some female tweets
    female_indices, = np.where(y_test[:test_samples_cap] == 0)

    print('Crafting adversarial examples...')
    successful_perturbations = 0
    failed_perturbations = 0
    adversarial_text_data = []
    adversarial_preds = np.array(preds)

    for index, doc in enumerate(docs_test[:test_samples_cap]):
        if y_test[index] == 0 and preds[index] == 0:
            # If model prediction is correct, and the true class is female,
            # craft adversarial text
            adv_doc, (y,
                      adv_y) = adversarial_paraphrase(doc,
                                                      grad_guide,
                                                      target=1,
                                                      use_typos=args.use_typos)

            pred = np.round(adv_y)
            if pred != preds[index]:
                successful_perturbations += 1
                print('{}. Successful example crafted.'.format(index))
            else:
                failed_perturbations += 1
                print('{}. Failure.'.format(index))

            adversarial_preds[index] = pred
            adversarial_text_data.append({
                'index': index,
                'doc': clean(doc),
                'adv': clean(adv_doc),