def train_student(dataset, nb_teachers, shift_dataset,inverse_w=None, weight = True): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :param weight: whether this is an importance weight sampling :return: True if student training went well """ assert input.create_dir_if_needed(FLAGS.train_dir) # Call helper function to prepare student data using teacher predictions stdnt_data = shift_dataset['data'] stdnt_labels = shift_dataset['pred'] print('number for deep is {}'.format(len(stdnt_labels))) if FLAGS.deeper: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) else: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student.ckpt' # NOLINT(long-line) if FLAGS.cov_shift == True: """ need to compute the weight for student curve weight into some bound, in case the weight is too large """ weights = inverse_w else: print('len of shift data'.format(len(shift_dataset['data']))) weights = np.zeros(len(stdnt_data)) print('len of weight={} len of labels= {} '.format(len(weights), len(stdnt_labels))) for i, x in enumerate(weights): weights[i] = np.float32(inverse_w[stdnt_labels[i]]) if weight == True: assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path, weights= weights) else: deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) # Compute final checkpoint name for student (with max number of steps) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) if dataset == 'adult': private_data, private_labels = input.ld_adult(test_only = False, train_only= True) elif dataset =='mnist': private_data, private_labels = input.ld_mnist(test_only = False, train_only = True) elif dataset =="svhn": private_data, private_labels = input.ld_svhn(test_only=False, train_only=True) # Compute student label predictions on remaining chunk of test set teacher_preds = deep_cnn.softmax_preds(private_data, ckpt_path_final) student_preds = deep_cnn.softmax_preds(stdnt_data, ckpt_path_final) # Compute teacher accuracy precision_t = metrics.accuracy(teacher_preds, private_labels) precision_s = metrics.accuracy(student_preds, stdnt_labels) precision_true = metrics.accuracy(student_preds, shift_dataset['label']) print('Precision of teacher after training:{} student={} true precision for student {}'.format(precision_t, precision_s,precision_true)) return precision_t, precision_s
def get_nns_of_x(x, other_data, other_labels, ckpt_path_final): '''get the similar order (from small to big). args: x: a single data. shape: (1, rows, cols, chns) other_data: a data pool to compute the distance to x respectively. shape: (-1, rows, cols, chns) ckpt_path_final: where pre-trained model is saved. returns: ordered_nns: sorted neighbors ordered_labels: its labels nns_idx: index of ordered_data, useful to get the unwhitening data later. ''' x_preds = deep_cnn.softmax_preds( x, ckpt_path_final ) # compute preds, deep_cnn.softmax_preds could be fed one data now other_data_preds = deep_cnn.softmax_preds(other_data, ckpt_path_final) distances = np.zeros(len(other_data_preds)) for j in range(len(other_data)): tem = x_preds - other_data_preds[j] # use which distance?!! here use L2 norm firstly distances[j] = np.linalg.norm(tem) # distance_X_tr_target[i, j] = np.sqrt(np.square(tem[FLAGS.target_class]) + np.square(tem[X_label[i]])) # sort(from small to large) nns_idx = np.argsort(distances) # argsort every rows np.savetxt('similarity_order_X_all_tr_X', nns_idx) ordered_nns = other_data[nns_idx] ordered_labels = other_labels[nns_idx] return ordered_nns, ordered_labels, nns_idx
def train_teacher(dataset, nb_teachers, teacher_id): """ This function trains a teacher (teacher id) among an ensemble of nb_teachers models for the dataset specified. :param dataset: string corresponding to dataset (svhn, cifar10) :param nb_teachers: total number of teachers in the ensemble :param teacher_id: id of the teacher being trained :return: True if everything went well """ # If working directories do not exist, create them assert input.create_dir_if_needed(FLAGS.data_dir) assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': train_data, train_labels, test_data, test_labels = input.ld_svhn( extended=True) elif dataset == 'cifar10': train_data, train_labels, test_data, test_labels = input.ld_cifar10() elif dataset == 'mnist': train_data, train_labels, test_data, test_labels = input.ld_mnist() else: print("Check value of dataset flag") return False # Retrieve subset of data for this teacher data, labels = input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) print("Length of training data: " + str(len(labels))) # Define teacher checkpoint filename and full path if FLAGS.deeper: filename = str(nb_teachers) + '_teachers_' + str( teacher_id) + '_deep.ckpt' else: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # Perform teacher training assert deep_cnn.train(data, labels, ckpt_path) # Append final step value to checkpoint for evaluation ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Retrieve teacher probability estimates on the test data teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) stdnt_data = test_data[:1000] #test_data[:FLAGS.stdnt_share] preds_for_student = deep_cnn.softmax_preds(stdnt_data, ckpt_path_final) np.save(FLAGS.train_dir + '/predictions.npy', preds_for_student) # Compute teacher accuracy precision = metrics.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) return True
def get_nns(x_o, other_data, other_labels, ckpt_final): """get the similar order (from small to big). args: x: a single data. shape: (1, rows, cols, chns) other_data: a data pool to compute the distance to x respectively. shape: (-1, rows, cols, chns) ckpt_final: where pre-trained model is saved. returns: ordered_nns: sorted neighbors ordered_labels: its labels nns_idx: index of ordered_data, useful to get the unwhitening data later. """ logging.info('Start find the neighbors of and the idx of sorted neighbors of x') x = copy.deepcopy(x_o) if len(x.shape) == 3: x = np.expand_dims(x, axis=0) x_preds = deep_cnn.softmax_preds(x, ckpt_final) # compute preds, deep_cnn.softmax_preds could be fed one data now other_data_preds = deep_cnn.softmax_preds(other_data, ckpt_final) distances = np.zeros(len(other_data_preds)) for j in range(len(other_data)): tem = x_preds - other_data_preds[j] # use which distance?!! here use L2 norm firstly distances[j] = np.linalg.norm(tem) most_cmp = np.hstack((other_data_preds, distances.reshape((-1, 1)), np.argmax(other_data_preds, axis=1).reshape((-1, 1)), other_labels.reshape((-1, 1)))) # with open(FLAGS.distance_file, 'w') as f: # f_csv = csv.writer(f) # f_csv.writerow(['preds','distances', 'pred_lbs','real_lbs']) # f_csv.writerows(most_cmp) # sort wrt distances (from small to large) nns_idx = np.argsort(distances) # with open(FLAGS.nns_idx_file, 'w') as f: # f_csv = csv.writer(f) # f_csv.writerow(['sorted_idx']) # f_csv.writerow(nns_idx[:1000].reshape(-1,1)) nns_data = other_data[nns_idx] nns_lbs = other_labels[nns_idx] # get the most common label in ordered_labels # output the most common 1, shape like: [(0, 6)] first is label, second is times print('neighbors:') ct = Counter(nns_lbs[:1000]).most_common(10) print(ct) return nns_data, nns_lbs, nns_idx
def find_stable_idx(train_data, train_labels, test_data, test_labels, ckpt, ckpt_final): """ """ stb_bin_file = FLAGS.data_dir + '/stable_bin_new.txt' stb_idx_file = FLAGS.data_dir + '/stable_idx_new.txt' if os.path.exists(stb_idx_file): stable_idx = np.loadtxt(stb_idx_file) stable_idx = stable_idx.astype(np.int32) logging.info(stb_idx_file + " already exist! Index of stable x have been restored at this file.") else: logging.info(stb_idx_file + "does not exist! Index of stable x will be generated by retraing data 10 times...") acc_bin = np.ones((10, len(test_labels))) for i in range(3): logging.info('retraining model {}/10'.format(i)) start_train(train_data, train_labels, test_data, test_labels, ckpt, ckpt_final) preds_ts = deep_cnn.softmax_preds(test_data, ckpt_final) predicted_lbs = np.argmax(preds_ts, axis=1) logging.info('predicted labels: {}'.format(predicted_lbs[:100])) logging.info('real labels:{}'.format(test_labels[:100])) acc_bin[i] = (predicted_lbs == test_labels) stable_bin = np.min(acc_bin, axis=0) np.savetxt(stb_bin_file, stable_bin) logging.info('all labels of test x have been saved at {}/stable_idx_new.txt'.format(FLAGS.data_dir)) stable_idx = np.argwhere(stable_bin > 0) stable_idx = np.reshape(stable_idx, (len(stable_idx),)) np.savetxt(stb_idx_file, stable_idx) logging.info('Index of stable test x have been saved at {}'.format(stb_idx_file)) return stable_idx
def main(argv=None): # Load the test dataset from MNIST test_data, test_labels = input.ld_mnist(test_only=True) # DATA_DIR? # Compute shape of array that will hold probabilities produced by each # teacher, for each training point, and each output class result_shape = (nb_teachers, len(test_data), nb_classes) # Create array that will hold result result = np.zeros(result_shape, dtype=np.float32) # Get predictions from each teacher for teacher_id in xrange(nb_teachers): # Compute path of checkpoint file for teacher model with ID teacher_id ckpt_path = "../RESULTS/MNIST_250/TRAIN_DIR/mnist_250_teachers_"+str(teacher_id)+".ckpt-2999" # Get predictions on our training data and store in result array preds_for_teacher = deep_cnn.softmax_preds(test_data, ckpt_path) # This can take a while when there are a lot of teachers so output status print("Computed Teacher " + str(teacher_id) + " softmax predictions") # Save in a numpy array np.save("PREDOS/predictions_teacher_"+str(teacher_id)+".npy", preds_for_teacher) return True
def ensemble_preds(dataset, nb_teachers, stdnt_data): """ Given a dataset, a number of teachers, and some input data, this helper function queries each teacher for predictions on the data and returns all predictions in a single array. (That can then be aggregated into one single prediction per input using aggregation.py (cf. function prepare_student_data() below) :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :param stdnt_data: unlabeled student training data :return: 3d array (teacher id, sample id, probability per class) """ # Compute shape of array that will hold probabilities produced by each # teacher, for each training point, and each output class result_shape = (nb_teachers, len(stdnt_data), FLAGS.nb_labels) # Create array that will hold result result = np.zeros(result_shape, dtype=np.float32) # Get predictions from each teacher for teacher_id in xrange(nb_teachers): # Compute path of checkpoint file for teacher model with ID teacher_id if FLAGS.deeper: ckpt_path = FLAGS.teachers_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt-' + str(FLAGS.teachers_max_steps - 1) #NOLINT(long-line) else: ckpt_path = FLAGS.teachers_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt-' + str(FLAGS.teachers_max_steps - 1) # NOLINT(long-line) # Get predictions on our training data and store in result array result[teacher_id] = deep_cnn.softmax_preds(stdnt_data, ckpt_path) # This can take a while when there are a lot of teachers so output status print("Computed Teacher " + str(teacher_id) + " softmax predictions") return result
def start_train(train_data, train_labels, test_data, test_labels, ckpt, ckpt_final, only_rpt=False): # if not only_rpt: assert deep_cnn.train(train_data, train_labels, ckpt) preds_tr = deep_cnn.softmax_preds(train_data, ckpt_final) # 得到概率向量 preds_ts = deep_cnn.softmax_preds(test_data, ckpt_final) logging.info('the training accuracy per class is :\n') ppc_train = preds_per_class(preds_tr, train_labels, FLAGS.P_per_class, FLAGS.P_all_classes) # 一个list,10维 logging.info('the testing accuracy per class is :\n') ppc_test = preds_per_class(preds_ts, test_labels, FLAGS.P_per_class, FLAGS.P_all_classes) # 一个list,10维 precision_ts = accuracy(preds_ts, test_labels) # 算10类的总的正确率 precision_tr = accuracy(preds_tr, train_labels) logging.info('Acc_tr:{:.3f} Acc_ts: {:.3f}'.format(precision_tr, precision_ts)) return precision_tr, precision_ts, ppc_train, ppc_test, preds_tr
def start_train(train_data, train_labels, test_data, test_labels, ckpt_path, ckpt_path_final): # assert deep_cnn.train(train_data, train_labels, ckpt_path) print('np.max(train_data) before preds: ',np.max(train_data)) preds_tr = deep_cnn.softmax_preds(train_data, ckpt_path_final) # 得到概率向量 preds_ts = deep_cnn.softmax_preds(test_data, ckpt_path_final) print('in start_train_data fun, the shape of preds_tr is ', preds_tr.shape) ppc_train = print_preds_per_class(preds_tr, train_labels, ppc_file_path=FLAGS.P_per_class, pac_file_path=FLAGS.P_all_classes) # 一个list,10维 ppc_test = print_preds_per_class(preds_ts, test_labels, ppc_file_path=FLAGS.P_per_class, pac_file_path=FLAGS.P_all_classes) # 一个list,10维 precision_ts = metrics.accuracy(preds_ts, test_labels) # 算10类的总的正确率 precision_tr = metrics.accuracy(preds_tr, train_labels) print('precision_tr:%.3f \nprecision_ts: %.3f' %(precision_tr, precision_ts)) # 已经包括了训练和预测和输出结果 return precision_tr, precision_ts, ppc_train, ppc_test, preds_tr
def show_result(x, changed_data, ckpt_path_final, ckpt_path_final_new, nb_success, nb_fail, target_class): '''show result. Args: x: attack sample. changed_data: those data in x_train which need to changed. ckpt_path_final: where old model saved. ckpt_path_final_new:where new model saved. Returns: nb_success: successful times. ''' x_4d = np.expand_dims(x, axis=0) x_label_before = np.argmax(deep_cnn.softmax_preds(x_4d, ckpt_path_final)) x_labels_after = np.argmax(deep_cnn.softmax_preds(x_4d, ckpt_path_final_new)) if changed_data is None: # directly add x print('\nold_label_of_x0: ', x_label_before, '\nnew_label_of_x0: ', x_labels_after) else: # watermark changed_labels_after = np.argmax(deep_cnn.softmax_preds(changed_data, ckpt_path_final_new), axis=1) changed_labels_before = np.argmax(deep_cnn.softmax_preds(changed_data, ckpt_path_final), axis=1) print('\nold_label_of_x0: ', x_label_before, '\nnew_label_of_x0: ', x_labels_after, '\nold_predicted_label_of_changed_data: ', changed_labels_before[:5], # see whether changed data is misclassified by old model '\nnew_predicted_label_of_changed_data: ', changed_labels_after[:5]) if x_labels_after == target_class: print('successful!!!') nb_success += 1 else: print('failed......') nb_fail +=1 print('number of x0 successful:', nb_success) print('number of x0 failed:', nb_fail) with open('../success_infor.txt','a+') as f: f.write('\nsuccess_time:'+str(nb_success)) f.write('\nx new label:\n'+str(x_labels_after)) return nb_success, nb_fail
def show_result(x, changed_data, ckpt_path_final, ckpt_path_final_new, nb_success, nb_fail, target_class): '''show result. Args: x: attack sample. changed_data: those data in x_train which need to changed. ckpt_path_final: where old model saved. ckpt_path_final_new:where new model saved. Returns: nb_success: successful times. ''' x_label_before = np.argmax(deep_cnn.softmax_preds(x, ckpt_path_final)) changed_labels_before = np.argmax(deep_cnn.softmax_preds( changed_data, ckpt_path_final), axis=1) x_labels_after = np.argmax(deep_cnn.softmax_preds(x, ckpt_path_final_new)) changed_labels_after = np.argmax(deep_cnn.softmax_preds( changed_data, ckpt_path_final_new), axis=1) print( '\nold_label_of_x0: ', x_label_before, '\nnew_label_of_x0: ', x_labels_after, '\nold_label_of_changed_data: ', changed_labels_before[: 5], # see whether changed data is misclassified by old model '\nnew_label_of_changed_data: ', changed_labels_after[:5]) if x_labels_after == target_class: print('successful!!!') nb_success += 1 else: print('failed......') nb_fail += 1 print('number of x0 successful:', nb_success) print('number of x0 failed:', nb_fail) return nb_success, nb_fail
def start_train_data(train_data, train_labels, test_data, test_labels, ckpt_path, ckpt_path_final): # assert deep_cnn.train(train_data, train_labels, ckpt_path) preds_tr = deep_cnn.softmax_preds(train_data, ckpt_path_final) # 得到概率向量 preds_ts = deep_cnn.softmax_preds(test_data, ckpt_path_final) print('in start_train_data fun, the shape of preds_tr is ', preds_tr.shape) ppc_train = utils.print_preds_per_class( preds_tr, train_labels, ppc_file_path=FLAGS.P_per_class, pac_file_path=FLAGS.P_all_classes) # 一个list,10维 ppc_test = utils.print_preds_per_class( preds_ts, test_labels) # 全体测试数据的概率向量送入函数,打印出来。计算 每一类 的正确率 precision_ts = metrics.accuracy(preds_ts, test_labels) # 算10类的总的正确率 precision_tr = metrics.accuracy(preds_tr, train_labels) print('precision_tr:', precision_tr, 'precision_ts:', precision_ts) # 已经包括了训练和预测和输出结果 return precision_tr, precision_ts, ppc_train, ppc_test, preds_tr
def predict(dataset, nb_teachers, teacher_id): if dataset == 'mnist': train_data, train_labels, test_data, test_labels = Input.load_mnist() filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # 读取教师模型对测试数据进行验证 teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) precision = analysis.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision))
def train_teacher(dataset, nb_teachers, teacher_id): """ This function trains a teacher (teacher id) among an ensemble of nb_teachers models for the dataset specified. :param dataset: string corresponding to dataset (svhn, cifar10) :param nb_teachers: total number of teachers in the ensemble :param teacher_id: id of the teacher being trained :return: True if everything went well """ # If working directories do not exist, create them assert input.create_dir_if_needed(FLAGS.data_dir) assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': train_data,train_labels,test_data,test_labels = input.ld_svhn(extended=True) elif dataset == 'cifar10': train_data, train_labels, test_data, test_labels = input.ld_cifar10() elif dataset == 'mnist': train_data, train_labels, test_data, test_labels = input.ld_mnist() else: print("Check value of dataset flag") return False # Retrieve subset of data for this teacher data, labels = input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) print("Length of training data: " + str(len(labels))) # Define teacher checkpoint filename and full path if FLAGS.deeper: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt' else: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # Perform teacher training assert deep_cnn.train(data, labels, ckpt_path) # Append final step value to checkpoint for evaluation ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Retrieve teacher probability estimates on the test data teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) return True
def show_result(x, cgd_data, ckpt_final, ckpt_final_new, nb_success, nb_fail, target_class): """show result. Args: x: attack sample. cgd_data: those data in x_train which need to changed. ckpt_final: where old model saved. ckpt_final_new:where new model saved. nb_success: how many successsul instances nb_fail: how many failed instances target_class: target label Returns: nb_success: successful times. """ x_4d = np.expand_dims(x, axis=0) x_label_before = np.argmax(deep_cnn.softmax_preds(x_4d, ckpt_final)) x_label_after = np.argmax(deep_cnn.softmax_preds(x_4d, ckpt_final_new)) if cgd_data is not None: # changed data exist changed_labels_after = np.argmax(deep_cnn.softmax_preds(cgd_data, ckpt_final_new), axis=1) changed_labels_before = np.argmax(deep_cnn.softmax_preds(cgd_data, ckpt_final), axis=1) # see whether changed data is misclassified by old model logging.info('\nold_predicted_label_of_changed_data: {}'.format(changed_labels_before[:10])) logging.info('\nnew_predicted_label_of_changed_data: {}'.format(changed_labels_after[:10])) logging.info('old_label_of_x0: {}\tnew_label_of_x0: {}'.format(x_label_before, x_label_after) ) if x_label_after == target_class: logging.info('successful!!!') nb_success += 1 else: logging.info('failed......') nb_fail += 1 logging.info('number of x0 successful: {}, number of x0 failed: {}'.format(nb_success, nb_fail)) with open(FLAGS.success_info, 'a+') as f: f.write('\nsuccess_time: {} fail_time: {} x new label: {}'.format(nb_success, nb_fail, x_label_after)) return nb_success, nb_fail
def ensemble_preds(dataset, nb_teachers, stdnt_data): # 得到的数据规模是:教师模型个数、学生未标记数据个数,标签类别。 # 最后得到的应该是每一个数据对应每一种标签的概率 result_shape = (nb_teachers, len(stdnt_data), FLAGS.nb_labels) result = np.zeros(result_shape, dtype=np.float32) for teacher_id in range(nb_teachers): # 得到对应的教师模型位置 ckpt_path = FLAGS.teachers_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str( teacher_id) + '.ckpt-' + str(FLAGS.teachers_max_steps - 1) result[teacher_id] =deep_cnn.softmax_preds(stdnt_data,ckpt_path,return_logits=False) print("Computed Teacher " + str(teacher_id) + " softmax predictions") # print(result[2][0]) return result
def train_student(dataset, nb_teachers): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :return: True if student training went well """ assert input.create_dir_if_needed(FLAGS.train_dir) # Call helper function to prepare student data using teacher predictions stdnt_dataset = prepare_student_data(dataset, nb_teachers, save=True) # Unpack the student dataset stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset print('stdnt_test_data.shape', stdnt_test_data.shape) if dataset == 'cifar10': stdnt_data = stdnt_data.reshape([-1, 32, 32, 3]) stdnt_test_data = stdnt_test_data.reshape([-1, 32, 32, 3]) elif dataset == 'mnist': stdnt_data = stdnt_data.reshape([-1, 28, 28, 1]) stdnt_test_data = stdnt_test_data.reshape([-1, 28, 28, 1]) elif dataset == 'svhn': stdnt_data = stdnt_data.reshape([-1, 32, 32, 3]) stdnt_test_data = stdnt_test_data.reshape([-1, 32, 32, 3]) # Prepare checkpoint filename and path if FLAGS.deeper: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) else: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student.ckpt' # NOLINT(long-line) # Start student training assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) # Compute final checkpoint name for student (with max number of steps) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Compute student label predictions on remaining chunk of test set student_preds = deep_cnn.softmax_preds(stdnt_test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(student_preds, stdnt_test_labels) print('Precision of student after training: ' + str(precision)) return True
def train_teacher(dataset, nb_teachers, teacher_id): """ 训练指定ID的教师模型 :param dataset: 数据集名称 :param nb_teachers: 老师数量 :param teacher_id: 老师ID :return: """ # 如果目录不存在就创建对应的目录 assert Input.create_dir_if_needed(FLAGS.data_dir) assert Input.create_dir_if_needed(FLAGS.train_dir) # 加载对应的数据集 if dataset == 'mnist': train_data, train_labels, test_data, test_labels = Input.load_mnist() else: print("没有对应的数据集") return False # 给对应的老师分配对应的数据 data, labels = Input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) print("Length of training data: " + str(len(labels))) filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # 开始训练,并保存训练模型 assert deep_cnn.train(data, labels, ckpt_path) # 拼接得到训练后的模型位置 ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # 读取教师模型对测试数据进行验证 teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) # 计算教师模型准确率 precision = analysis.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) return True
def train_student(dataset, nb_teachers): assert Input.create_dir_if_needed(FLAGS.train_dir) # 准备学生模型数据 student_dataset = prepare_student_data(dataset,nb_teachers,save=True) # 解压学生数据 stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = student_dataset ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student.ckpt' # 训练 assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # 预测 student_preds = deep_cnn.softmax_preds(stdnt_test_data,ckpt_path_final) precision = analysis.accuracy(student_preds,stdnt_test_labels) print('Precision of student after training: ' + str(precision)) return True
def train_student(dataset, nb_teachers): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :return: True if student training went well """ assert input.create_dir_if_needed(FLAGS.train_dir) # Call helper function to prepare student data using teacher predictions stdnt_dataset = prepare_student_data(dataset, nb_teachers, save=True) # Unpack the student dataset stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset # Prepare checkpoint filename and path if FLAGS.deeper: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) else: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student.ckpt' # NOLINT(long-line) # Start student training assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) # Compute final checkpoint name for student (with max number of steps) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Compute student label predictions on remaining chunk of test set student_preds = deep_cnn.softmax_preds(stdnt_test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(student_preds, stdnt_test_labels) print('Precision of student after training: ' + str(precision)) return True
def main(argv=None): # pylint: disable=unused-argument ckpt_dir = FLAGS.train_dir + '/' + str(FLAGS.dataset) + '/' # create dir used in this project dir_list = [FLAGS.data_dir, FLAGS.train_dir, FLAGS.image_dir, FLAGS.record_dir, ckpt_dir] for i in dir_list: input_.create_dir_if_needed(i) ckpt = ckpt_dir + 'model.ckpt' ckpt_final = ckpt + '-' + str(FLAGS.max_steps - 1) # create log files and add dividing line assert dividing_line() train_data, train_labels, test_data, test_labels = my_load_dataset(FLAGS.dataset) first = 0 # 数据没水印之前,要训练一下。然后存一下。知道正确率。(只用训练一次) if first: logging.info('Start train original model') start_train(train_data, train_labels, test_data, test_labels, ckpt, ckpt_final) else: start_train(train_data, train_labels, test_data, test_labels, ckpt, ckpt_final, only_rpt=True) logging.info('Original model will be restored from ' + ckpt_final) if FLAGS.slt_stb_ts_x: logging.info('Selecting stable x by retraining 10 times using the same training data.') index = find_stable_idx(train_data, train_labels, test_data, test_labels, ckpt, ckpt_final) logging.info('First 20 / {} index of stable x: \n{}'.format(len(index), index[:20])) else: index = range(len(test_data)) logging.info('Selecting x in all testing data, First 20 index: \n{}'.format(index[:20])) # decide which index if FLAGS.slt_vnb_tr_x: index = find_vnb_idx(index, train_data, train_labels, test_data, test_labels, ckpt_final) nb_success, nb_fail = 0, 0 for idx in index: logging.info('================current num: {} ================'.format(idx)) x = copy.deepcopy(test_data[idx]) x_4d = np.expand_dims(x, axis=0) x_pred_lb = np.argmax(deep_cnn.softmax_preds(x_4d, ckpt_final)) logging.info('The real label of x is :{} '.format(test_labels[idx])) logging.info('The predicted label of x is :{}'.format(x_pred_lb)) if x_pred_lb != test_labels[idx]: logging.info('This x can not be classified correctly, not stable, pass!') continue # decide which target class if FLAGS.slt_lb: # target class is changed. FLAGS.tgt_lb = find_vnb_label(train_data, train_labels, x, test_labels[idx], ckpt_final, idx=idx)[0] else: # target_label do not need to be changed if test_labels[idx] == FLAGS.tgt_lb: logging.info('The label of the data is already target label, pass!') continue logging.info('target label is {}'.format(FLAGS.tgt_lb)) # decide which part of data to be changed cgd_data, cgd_lbs, kpt_data_all, kpt_lbs_all = get_cgd(train_data, train_labels, x, ckpt_final) # save x, and note to shift x to int32 befor save fig deep_cnn.save_fig(x.astype(np.int32), '/'.join((FLAGS.image_dir, FLAGS.dataset, 'original', str(idx) + '.png'))) pf_path = ckpt_dir + str(idx) + 'model_perfect.ckpt' pf_path_final = pf_path + '-' + str(FLAGS.max_steps - 1) # decide which approach if FLAGS.x_grads: # iterate x's gradients logging.info('Start train by change x with gradients.\n') for itr in range(1000): logging.info('-----Iterate number: {}/1000-----'.format(itr)) logging.info('Computing gradients ...') new_ckpt = ckpt_dir + str(idx) + 'model_itr_grads.ckpt' new_ckpt_final = new_ckpt + '-' + str(FLAGS.max_steps - 1) # this line will iterate data by gradients if itr == 0: cgd_data_new = itr_grads(cgd_data, x, ckpt_final, itr, idx) else: cgd_data_new = itr_grads(cgd_data, x, new_ckpt_final, itr, idx) train_data_new = np.vstack((cgd_data_new, kpt_data_all)) train_labels_new = np.hstack((cgd_lbs, kpt_lbs_all)) # cgd_count, kpt_count = 0, 0 # for i in range(len(train_data_new)): # if (train_data_new[i] == cgd_data_new[0]).all(): # cgd_count += 1 # logging.info('True, this train data is cgd {} / {}'.format(cgd_count, len(train_data_new))) # else: # kpt_count += 1 # logging.info('False, this train data is kpt {} / {}'.format(kpt_count, len(train_data_new))) np.random.seed(100) np.random.shuffle(train_data_new) np.random.seed(100) np.random.shuffle(train_labels_new) print(train_data_new.dtype, train_labels_new.dtype) start_train(train_data_new, train_labels_new, test_data, test_labels, new_ckpt, new_ckpt_final) nb_success, nb_fail = show_result(x, cgd_data_new, ckpt_final, new_ckpt_final, nb_success, nb_fail, FLAGS.tgt_lb) with open(FLAGS.success_infO, 'a+') as f: f.write('data_idx_%d, iteration_%d' % (idx, itr)) if nb_success == 1: logging.info('This data is successful first time, we need to retrain to entrue.') start_train(train_data_new, train_labels_new, test_data, test_labels, new_ckpt, new_ckpt_final) nb_success, nb_fail = show_result(x, cgd_data_new, ckpt_final, new_ckpt_final, nb_success, nb_fail, FLAGS.tgt_lb) if nb_success == 2: logging.info('This data is really successful, go to next data!') break else: logging.info('The success of this data may be coincidence, continue iterating...') elif FLAGS.directly_add_x: # directly add x0 to training data logging.info('Start train by add x directly.\n') x_train, y_train = tr_data_add_x(128, x, FLAGS.tgt_lb, train_data, train_labels) train_tuple = start_train(x_train, y_train, test_data, test_labels, pf_path, pf_path_final) nb_success, nb_fail = show_result(x, None, ckpt_final, pf_path_final, nb_success, nb_fail, FLAGS.tgt_lb) else: # add watermark watermark = copy.deepcopy(x) if FLAGS.wm_x_grads: # gradients as watermark from pf_path_final logging.info('start train by add x gradients as watermark\n') # real label's gradients wrt x_a grads_tuple_a = deep_cnn.gradients(x, ckpt_final, idx, FLAGS.tgt_lb, new=False) grads_mat_abs_a, grads_mat_plus_a, grads_mat_show_a = grads_tuple_a # get the gradients mat which may contain the main information grads_mat = get_least_mat(grads_mat_plus_a, sv_ratio=0.3, return_01=True, idx=idx) deep_cnn.save_fig(grads_mat, FLAGS.image_dir + '/' + str(FLAGS.dataset) + '/gradients/number_' + str(idx) + '/least_grads.png') # logging.info('x:\n',x[0]) # logging.info('least_grads:\n', grads_mat[0]) watermark = grads_mat * x # logging.info('watermark:\n',watermark[0]) deep_cnn.save_fig(watermark.astype(np.int32), FLAGS.image_dir + '/' + str(FLAGS.dataset) + '/gradients/number_' + str(idx) + '/least_grads_mul_x.png') elif FLAGS.wm_x_fft: # fft as watermark logging.info('Start train by add x fft as watermark.\n') watermark = fft(x, ww=1) deep_cnn.save_fig(watermark.astype(np.int32), FLAGS.image_dir + '/' + str(FLAGS.dataset) + '/fft/' + str(idx) + '.png') # shift to int32 befor save fig # save 10 original images for i in range(10): # shift to int for save fig img = '/'.join((FLAGS.image_dir, FLAGS.dataset, 'changed_data', 'power_' + str(FLAGS.water_power), 'number' + str(idx), str(i) + '_ori.png')) deep_cnn.save_fig(cgd_data[i].astype(np.int32), img) # get new training data cgd_data = wm_cgd_data(watermark, cgd_data) train_data_new = np.vstack((cgd_data, kpt_data_all)) train_labels_new = np.hstack((cgd_lbs, kpt_lbs_all)) np.random.seed(100) np.random.shuffle(train_data_new) np.random.seed(100) np.random.shuffle(train_labels_new) # cgd_count, kpt_count = 0, 0 # for i in train_data_new: # if (i == cgd_data[0]).all(): # cgd_count += 1 # logging.info('True, this train data is cgd {} / {}'.format(cgd_count, len(train_data_new))) # else: # kpt_count += 1 # logging.info('False, this train data is kpt {} / {}'.format(kpt_count, len(train_data_new))) # train_data_new, cgd_data = tr_data_wm(train_data, train_labels, watermark, ckpt_final) # save 10 watermark images for i in range(10): # shift to int for save fig img = '/'.join((FLAGS.image_dir, FLAGS.dataset, 'changed_data', 'power_' + str(FLAGS.water_power), 'number' + str(idx), str(i) + '.png')) deep_cnn.save_fig(cgd_data[i].astype(np.int32), img) if FLAGS.wm_x_grads: # ckpt for watermark with x's gradients new_ckpt = ckpt_dir + str(idx) + 'model_wm_grads.ckpt' new_ckpt_final = new_ckpt + '-' + str(FLAGS.max_steps - 1) elif FLAGS.wm_x_fft: new_ckpt = ckpt_dir + str(idx) + 'model_wm_fft.ckpt' new_ckpt_final = new_ckpt + '-' + str(FLAGS.max_steps - 1) elif FLAGS.x_grads: new_ckpt = ckpt_dir + str(idx) + 'model_grads.ckpt' new_ckpt_final = new_ckpt + '-' + str(FLAGS.max_steps - 1) else: # ckpt for watermark with x self new_ckpt = ckpt_dir + str(idx) + 'model_wm_x.ckpt' new_ckpt_final = new_ckpt + '-' + str(FLAGS.max_steps - 1) logging.info('np.max(train_data) before new train: {}'.format(np.max(train_data))) start_train(train_data_new, train_labels_new, test_data, test_labels, new_ckpt, new_ckpt_final) nb_success, nb_fail = show_result(x, cgd_data, ckpt_final, new_ckpt_final, nb_success, nb_fail, FLAGS.tgt_lb) return True
def train_student(dataset, nb_teachers, weight=True, inverse_w=None, shift_dataset=None): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :param weight: whether this is an importance weight sampling :return: True if student training went well """ assert input.create_dir_if_needed(FLAGS.train_dir) # Call helper function to prepare student data using teacher predictions if shift_dataset is not None: stdnt_data, stdnt_labels = prepare_student_data( dataset, nb_teachers, save=True, shift_data=shift_dataset) else: if FLAGS.PATE2 == True: keep_idx, stdnt_data, stdnt_labels = prepare_student_data( dataset, nb_teachers, save=True) else: stdnt_data, stdnt_labels = prepare_student_data(dataset, nb_teachers, save=True) rng = np.random.RandomState(FLAGS.dataset_seed) rand_ix = rng.permutation(len(stdnt_labels)) stdnt_data = stdnt_data[rand_ix] stdnt_labels = stdnt_labels[rand_ix] print('number for deep is {}'.format(len(stdnt_labels))) # Unpack the student dataset, here stdnt_labels are already the ensemble noisy version # Prepare checkpoint filename and path if FLAGS.deeper: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) else: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student.ckpt' # NOLINT(long-line) # Start student training if FLAGS.cov_shift == True: """ need to compute the weight for student curve weight into some bound, in case the weight is too large """ weights = inverse_w #y_s = np.expand_dims(y_s, axis=1) else: print('len of shift data'.format(len(shift_dataset['data']))) weights = np.zeros(len(stdnt_data)) print('len of weight={} len of labels= {} '.format( len(weights), len(stdnt_labels))) for i, x in enumerate(weights): weights[i] = np.float32(inverse_w[stdnt_labels[i]]) if weight == True: if FLAGS.PATE2 == True: assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path, weights=weights[keep_idx]) else: assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path, weights=weights) else: deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) # Compute final checkpoint name for student (with max number of steps) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) if dataset == 'adult': private_data, private_labels = input.ld_adult(test_only=False, train_only=True) elif dataset == 'mnist': private_data, private_labels = input.ld_mnist(test_only=False, train_only=True) elif dataset == "svhn": private_data, private_labels = input.ld_svhn(test_only=False, train_only=True) # Compute student label predictions on remaining chunk of test set teacher_preds = deep_cnn.softmax_preds(private_data, ckpt_path_final) student_preds = deep_cnn.softmax_preds(stdnt_data, ckpt_path_final) # Compute teacher accuracy precision_t = metrics.accuracy(teacher_preds, private_labels) precision_s = metrics.accuracy(student_preds, stdnt_labels) if FLAGS.cov_shift == True: student_file_name = FLAGS.data + 'PCA_student' + FLAGS.dataset + '.pkl' f = open(student_file_name, 'rb') test = pickle.load(f) if FLAGS.PATE2 == True: test_labels = test['label'][keep_idx] else: test_labels = test['label'] precision_true = metrics.accuracy(student_preds, test_labels) print( 'Precision of teacher after training:{} student={} true precision for student {}' .format(precision_t, precision_s, precision_true)) return len(test_labels), precision_t, precision_s
def train_teacher(dataset, nb_teachers, teacher_id): """ This function trains a teacher (teacher id) among an ensemble of nb_teachers models for the dataset specified. :param dataset: string corresponding to dataset (svhn, cifar10) :param nb_teachers: total number of teachers in the ensemble :param teacher_id: id of the teacher being trained :return: True if everything went well """ # If working directories do not exist, create them assert input.create_dir_if_needed(FLAGS.data_dir) assert input.create_dir_if_needed(FLAGS.train_dir) print("teacher {}:".format(teacher_id)) # Load the dataset if dataset == 'svhn': train_data, train_labels, test_data, test_labels = input.ld_svhn( extended=True) elif dataset == 'cifar10': train_data, train_labels, test_data, test_labels = input.ld_cifar10() elif dataset == 'mnist': train_data, train_labels, test_data, test_labels = input.ld_mnist() else: print("Check value of dataset flag") return False path = os.path.abspath('.') path1 = path + '\\plts_nodisturb\\' # 对标签进行干扰 import copy train_labels1 = copy.copy(train_labels) train_labels2 = disturb(train_labels, 0.1) disturb(test_labels, 0.1) #path1 = path + '\\plts_withdisturb\\' # Retrieve subset of data for this teacher #干扰前 data, labels = input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) from pca import K_S import operator print(operator.eq(train_labels1, train_labels2)) print("干扰前: ", K_S.tst_norm(train_labels1)) print("干扰后: ", K_S.tst_norm(train_labels2)) print(K_S.tst_samp(train_labels1, train_labels2)) print("Length of training data: " + str(len(labels))) # Define teacher checkpoint filename and full path if FLAGS.deeper: filename = str(nb_teachers) + '_teachers_' + str( teacher_id) + '_deep.ckpt' else: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # Perform teacher training losses = deep_cnn.train(data, labels, ckpt_path) # Append final step value to checkpoint for evaluation ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Retrieve teacher probability estimates on the test data teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) print("each n step loss: ", losses) #x = list(range(1, len(losses)+1)) #plt.plot(x, losses, 'bo-', markersize=20) #plt.savefig(path1 + 'loss' + str(teacher_id) + '.jpg') #plt.show() #print("x: ",x) #print("loss: ", losses) return True
def train_student(dataset, nb_teachers, knock, weight=True, inverse_w=None, shift_dataset=None): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :return: True if student training went well """ assert input.create_dir_if_needed(FLAGS.train_dir) print('len of shift data'.format(len(shift_dataset['data']))) # Call helper function to prepare student data using teacher predictions stdnt_data, stdnt_labels = prepare_student_data(dataset, nb_teachers, save=True, shift_data=shift_dataset) # Unpack the student dataset, here stdnt_labels are already the ensemble noisy version # Prepare checkpoint filename and path if FLAGS.deeper: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) else: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student.ckpt' # NOLINT(long-line) # Start student training weights = np.zeros(len(stdnt_data)) print('len of weight={} len of labels= {} '.format(len(weights), len(stdnt_labels))) for i, x in enumerate(weights): weights[i] = np.float32(inverse_w[stdnt_labels[i]]) if weight == True: assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path, weights=weights) else: deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) # Compute final checkpoint name for student (with max number of steps) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) private_data, private_labels = input.ld_mnist(test_only=False, train_only=True) # Compute student label predictions on remaining chunk of test set teacher_preds = deep_cnn.softmax_preds(private_data, ckpt_path_final) student_preds = deep_cnn.softmax_preds(stdnt_data, ckpt_path_final) # Compute teacher accuracy precision_t = metrics.accuracy(teacher_preds, private_labels) precision_s = metrics.accuracy(student_preds, stdnt_labels) if knock == True: print( 'weight is {} shift_ratio={} Precision of teacher after training:{} student={}' .format(weight, shift_dataset['shift_ratio'], precision_t, precision_s)) else: print( 'weight is {} shift_ratio={} Precision of teacher after training:{} student={}' .format(weight, shift_dataset['alpha'], precision_t, precision_s)) return True
def itr_grads(cgd_data, x, ckpt_final, itr, idx): logging.info('{}'.format(time.asctime(time.localtime(time.time())), )) # real label's gradients wrt x_a x_grads = deep_cnn.gradients(x, ckpt_final, idx, FLAGS.tgt_lb, new=False)[0] logging.info('the lenth of changed data: {}'.format(len(cgd_data))) do_each_grad = 0 # iterate changed data one by one if do_each_grad == 1: each_nb = 0 for each in cgd_data: x_grads_cp = copy.deepcopy(x_grads) # every time x_grads_cp is a still x_grads logging.info('\n---start change data of number: {} / {}---'.format(each_nb, len(cgd_data))) each_grads = deep_cnn.gradients(each, ckpt_final, idx, FLAGS.tgt_lb, new=False)[0] each_grads_cp = copy.deepcopy(each_grads) # in x_grads,set a pixel to 0 if its sign is different whith pexel in each_grads # this could ensure elected pixels that affect y least for x_i but most for x_A logging.info('{}'.format(x_grads_cp[0][0])) x_grads_cp[(x_grads_cp * each_grads_cp) < 0] = 0 logging.info('---up is x_grads[0][0], next is each_grads[0][0]---') logging.info('{}'.format(each_grads_cp[0][0])) logging.info('--next is combined matrix---') # show how may 0 in x_grads x_grads_flatten = np.reshape(x_grads_cp, (-1,)) ct = Counter(x_grads_flatten) logging.info('there are {} pixels not changed in image {}'.format(ct[0], each_nb)) each_4d = np.expand_dims(each, axis=0) each_pred_lb_b = np.argmax(deep_cnn.softmax_preds(each_4d, ckpt_final)) logging.info('the predicted label of each before changing is :{} '.format(each_pred_lb_b)) if itr == 0: img_dir_ori = FLAGS.image_dir + '/' + str(FLAGS.dataset) + '/changed_data/x_grads/number_' + str( idx) + '/' + str(itr) + '/' + str(each_nb) + '_ori.png' deep_cnn.save_fig(each.astype(np.int32), img_dir_ori) # compute delta_x preds_x = deep_cnn.softmax_preds(x, ckpt_final) preds_each = deep_cnn.softmax_preds(each, ckpt_final) delta_x = np.linalg.norm(preds_each - preds_x) / each_grads # iterate each changed data each += (delta_x * FLAGS.epsilon) each_pred_lb_a = np.argmax(deep_cnn.softmax_preds(each, ckpt_final)) logging.info('the predicted label of each after changing is :{} '.format(each_pred_lb_a)) each = np.clip(each, 0, 255) img_dir = '/'.join(FLAGS.image_dir, FLAGS.dataset, 'changed_data/x_grads/number_' +str(idx), 'img_' + str(each_nb), 'iteration_' + str(itr) + '.png') deep_cnn.save_fig(each.astype(np.int32), img_dir) each_nb += 1 else: # iterate changed data batch by batch, pretty fast batch_nbs = int(np.floor(len(cgd_data) / FLAGS.batch_size)) cgd_data_new = np.zeros((1, cgd_data.shape[1], cgd_data.shape[2], cgd_data.shape[3])) for batch_nb in range(batch_nbs): x_grads_cp = copy.deepcopy( x_grads) # every time x_grads_cp is a still x_grads, mustnot change this line's position! logging.info('\n---start change data of batch: {} / {}---'.format(batch_nb, batch_nbs)) if batch_nb == (batch_nbs - 1): batch = cgd_data[batch_nb * FLAGS.batch_size:] else: batch = cgd_data[batch_nb * FLAGS.batch_size:(batch_nb + 1) * FLAGS.batch_size] batch_grads = deep_cnn.gradients(batch, ckpt_final, idx, FLAGS.tgt_lb, new=False)[ 0] # a batch of gradients # compute delta_x preds_x = deep_cnn.softmax_preds(x, ckpt_final) preds_batch = deep_cnn.softmax_preds(batch, ckpt_final) delta_x = np.linalg.norm(preds_batch - preds_x) / batch_grads x_grads_cp_batch = np.repeat(np.expand_dims(x_grads_cp, axis=0), len(batch), axis=0) for i in range(len(batch)): # deep_cnn.save_hotfig(x_grads_cp_batch[i], '../x_grads_cp_batch_old/'+str(i)+'.png') # deep_cnn.save_hotfig(batch_grads_cp[i], '../batch_grads_cp_old/'+str(i)+'.png') #x_grads_cp_batch[i][(x_grads_cp_batch[i] * batch_grads_cp[i]) < 0] = 0 pass # deep_cnn.save_hotfig(batch_grads_cp[i], '../batch_grads_cp/'+str(i)+'.png') # deep_cnn.save_hotfig(x_grads_cp_batch[i], '../x_grads_cp_batch/'+str(i)+'.png') # logging.info(x_grads_cp_batch[i]) batch_pred_lb_b = np.argmax(deep_cnn.softmax_preds(batch, ckpt_final), axis=1) logging.info('the predicted label of batch before changing is : {}'.format(batch_pred_lb_b[:20])) # save the original 5 figures if batch_nb == 0 and itr == 0: for i in range(10): img_dir = FLAGS.image_dir + '/' + str(FLAGS.dataset) + '/changed_data/x_grads/number_' + str( idx) + '/' + 'img_' + str(i) + '/iteration_' + str(itr) + '_ori.png' deep_cnn.save_fig(batch[i].astype(np.int32), img_dir) # iterate each changed data batch += (delta_x * FLAGS.epsilon) batch_pred_lb_a = np.argmax(deep_cnn.softmax_preds(batch, ckpt_final), axis=1) logging.info('the predicted label of batch after changing is : {}'.format(batch_pred_lb_a[:20])) batch = np.clip(batch, 0, 255) # save the changed 5 figures after one iteration if batch_nb == 0: for i in range(10): img_dir = FLAGS.image_dir + '/' + str(FLAGS.dataset) + '/changed_data/x_grads/number_' + str( idx) + '/' + 'img_' + str(i) + '/iteration_' + str(itr) + '.png' deep_cnn.save_fig(batch[i].astype(np.int32), img_dir) batch_nb += 1 cgd_data_new = np.vstack((cgd_data_new, batch)) cgd_data_new = cgd_data_new[1:].astype(np.float32) return cgd_data_new
def main(argv=None): # pylint: disable=unused-argument ckpt_dir = FLAGS.train_dir + '/' + str(FLAGS.dataset)+ '/' # create dir used in this project dir_list = [FLAGS.data_dir,FLAGS.train_dir, FLAGS.image_dir, FLAGS.record_dir,ckpt_dir] for i in dir_list: assert input_.create_dir_if_needed(i) # create log files and add dividing line assert dividing_line() train_data, train_labels, test_data, test_labels = my_load_dataset(FLAGS.dataset) ckpt_path = ckpt_dir + 'model.ckpt' ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # 数据没水印之前,要训练一下。然后存一下。知道正确率。(只用训练一次) #print('Start train original model') #train_tuple = start_train(train_data, train_labels, test_data, test_labels, ckpt_path, ckpt_path_final) #precision_tr, precision_ts, ppc_train, ppc_test, preds_tr = train_tuple print('Original model will be restored from ' + ckpt_path_final) nb_success, nb_fail = 0, 0 # decide which index if FLAGS.selected_x: index = [9905, 9894, 9906] else: index = range(len(test_data)) for idx in index: time.asctime( time.localtime(time.time())) print('================current num: %d ================'% idx) x = copy.deepcopy(test_data[idx]) x_4d = np.expand_dims(x, axis=0) x_pred_lb = np.argmax(deep_cnn.softmax_preds(x_4d, ckpt_path_final)) print('the real label of x is :%d ' %test_labels[idx]) print('the predicted label of x is :%d ' %x_pred_lb) if x_pred_lb != test_labels[idx]: print('x can not be classified before, pass!') continue # decide which target class if FLAGS.selected_lb: # target class is changed. FLAGS.tgt_lb= save_neighbors( train_data, train_labels, x, test_labels[idx], ckpt_path_final, idx, saved_nb=1000)[0] else: # target_class do not need to be changed if test_labels[idx] == FLAGS.tgt_lb: print('the label of the data is already target label') continue print('target label is %d' % FLAGS.tgt_lb) # decide which part of data to be changed train_data_new, cgd_data, cgd_lbs = get_cgd(train_data, train_labels, x, ckpt_path_final) # save x, and note to shift x to int32 befor save fig deep_cnn.save_fig(x.astype(np.int32), FLAGS.image_dir +'/'+ str(FLAGS.dataset) + '/original/'+str(idx)+'.png') perfect_path = ckpt_dir + str(idx) + 'model_perfect.ckpt' perfect_path_final = perfect_path + '-' + str(FLAGS.max_steps - 1) # decide which approach if FLAGS.x_grads: # iterate x's gradients print('start train by change x with gradients.\n') for itr in range(1000): print('-----iterate number: %d/1000-----' % itr) print('computing gradients ...') new_ckpt_path = ckpt_dir + str(idx) + 'model_itr_grads.ckpt' new_ckpt_path_final = new_ckpt_path + '-' + str(FLAGS.max_steps - 1) # this line will iterate data by gradients if itr==0: itr_grads(cgd_data, x, ckpt_path_final, itr, idx) else: itr_grads(cgd_data, x, new_ckpt_path_final, itr, idx) start_train(train_data_new, train_labels, test_data, test_labels, new_ckpt_path, new_ckpt_path_final) nb_success, nb_fail = show_result(x, cgd_data, ckpt_path_final, new_ckpt_path_final, nb_success, nb_fail, FLAGS.tgt_lb) if nb_success == 1: break elif FLAGS.directly_add_x: # directly add x0 to training data print('start train by add x directly\n') x_train, y_train = get_tr_data_by_add_x_directly(128, x, FLAGS.tgt_lb, train_data, train_labels) train_tuple = start_train(x_train, y_train, test_data, test_labels, perfect_path, perfect_path_final) nb_success, nb_fail = show_result(x, None, ckpt_path_final, perfect_path_final, nb_success, nb_fail, FLAGS.tgt_lb) else: # add watermark watermark = copy.deepcopy(x) if FLAGS.watermark_x_grads: # gradients as watermark from perfect_path_final print('start train by add x gradients as watermark\n') # real label's gradients wrt x_a grads_tuple_a= deep_cnn.gradients(x, ckpt_path_final, idx,FLAGS.tgt_lb, new=False) grads_mat_abs_a, grads_mat_plus_a, grads_mat_show_a = grads_tuple_a # get the gradients mat which may contain the main information grads_mat = get_least_mat(grads_mat_plus_a, saved_ratio=0.3, return_01=True, idx=idx) deep_cnn.save_fig(grads_mat, FLAGS.image_dir+ '/'+str(FLAGS.dataset)+ '/gradients/number_'+str(idx)+'/least_grads.png') #print('x:\n',x[0]) #print('least_grads:\n', grads_mat[0]) watermark = grads_mat * x #print('watermark:\n',watermark[0]) deep_cnn.save_fig(watermark.astype(np.int32),FLAGS.image_dir+ '/'+ str(FLAGS.dataset)+'/gradients/number_'+str(idx)+'/least_grads_mul_x.png') elif FLAGS.x_grads: print('start train by change x with gradients.\n') # real label's gradients wrt x_a grads_tuple_a= deep_cnn.gradients(x, ckpt_path_final, idx,FLAGS.tgt_lb, new=False) grads_mat_abs_a, grads_mat_plus_a, grads_mat_show_a = grads_tuple_a # get the gradients mat which may contain the main information grads_mat = get_least_mat(grads_mat_plus_a, saved_ratio=0.1, return_01=True, idx=idx) deep_cnn.save_fig(grads_mat, FLAGS.image_dir+ '/'+str(FLAGS.dataset)+ '/gradients/number_'+str(idx)+'/least_grads.png') #print('x:\n',x[0]) #print('least_grads:\n', grads_mat[0]) watermark = grads_mat * x #print('watermark:\n',watermark[0]) deep_cnn.save_fig(watermark.astype(np.int32),FLAGS.image_dir+ '/'+ str(FLAGS.dataset)+'/gradients/number_'+str(idx)+'/least_grads_mul_x.png') elif FLAGS.watarmark_x_fft: # fft as watermark print('start train by add x fft as watermark\n') watermark = fft(x, ww=1) deep_cnn.save_fig(watermark.astype(np.int32), FLAGS.image_dir +'/'+ str(FLAGS.dataset) + '/fft/'+str(idx)+'.png') # shift to int32 befor save fig # get new training data new_data_tuple = get_tr_data_watermark(train_data, train_labels, watermark, FLAGS.tgt_lb, ckpt_path_final, sml=True, cgd_ratio=FLAGS.cgd_ratio, power=FLAGS.water_power) train_data_new, changed_data = new_data_tuple # train with new data #save 10 watermark images for i in range(10): # shift to int for save fig deep_cnn.save_fig(cgd_data[i].astype(np.int), (FLAGS.image_dir + '/'+ str(FLAGS.dataset)+'/'+ 'changed_data/'+ 'power_'+str(FLAGS.water_power)+'/'+ 'number'+str(idx)+'/'+ str(i)+'.png')) if FLAGS.watermark_x_grads: # ckpt_path for watermark with x's gradients new_ckpt_path = ckpt_dir + str(idx) + 'model_wm_grads.ckpt' new_ckpt_path_final = new_ckpt_path + '-' + str(FLAGS.max_steps - 1) elif FLAGS.watarmark_x_fft: new_ckpt_path = ckpt_dir + str(idx) + 'model_wm_fft.ckpt' new_ckpt_path_final = new_ckpt_path + '-' + str(FLAGS.max_steps - 1) elif FLAGS.x_grads: new_ckpt_path = ckpt_dir + str(idx) + 'model_grads.ckpt' new_ckpt_path_final = new_ckpt_path + '-' + str(FLAGS.max_steps - 1) else: # ckpt_path for watermark with x self new_ckpt_path = ckpt_dir + str(idx) + 'model_wm_x.ckpt' new_ckpt_path_final = new_ckpt_path + '-' + str(FLAGS.max_steps - 1) print('np.max(train_data) before new train: ',np.max(train_data)) train_tuple = start_train(train_data_new, train_labels, test_data, test_labels, new_ckpt_path, new_ckpt_path_final) nb_success, nb_fail = show_result(x, cgd_data, ckpt_path_final, new_ckpt_path_final, nb_success, nb_fail, FLAGS.tgt_lb) #precision_tr, precision_ts, ppc_train, ppc_test, preds_tr = train_tuple return True
def itr_grads(cgd_data, x, ckpt_path_final, itr, idx): # real label's gradients wrt x_a x_grads = deep_cnn.gradients(x, ckpt_path_final, idx, FLAGS.tgt_lb, new=False)[0] print('the lenth of changed data: %d' % len(cgd_data)) do_each_grad = 0 if do_each_grad == 1: each_nb = 0 for each in cgd_data: x_grads_cp = copy.deepcopy(x_grads) # every time x_grads_cp is a still x_grads print('\n---start change data of number: %d / %d---' % (each_nb, len(cgd_data))) each_grads = deep_cnn.gradients(each, ckpt_path_final, idx, FLAGS.tgt_lb, new=False)[0] each_grads_cp = copy.deepcopy(each_grads) # in x_grads,set a pixel to 0 if its sign is different whith pexel in each_grads # this could ensure elected pixels that affect y least for x_i but most for x_A print(x_grads_cp[0][0]) x_grads_cp[(x_grads_cp * each_grads_cp) <0] = 0 print('---up is x_grads[0][0], next is each_grads[0][0]---') print(each_grads_cp[0][0]) print('--next is combined matrix---') # show how may 0 in x_grads x_grads_flatten = np.reshape(x_grads_cp, (-1, )) ct = Counter(x_grads_flatten) print('there are %d pixels not changed in image %d' % (ct[0], each_nb)) each_4d = np.expand_dims(each, axis=0) each_pred_lb_b = np.argmax(deep_cnn.softmax_preds(each_4d, ckpt_path_final)) print('the predicted label of each before changing is :%d ' %each_pred_lb_b) if itr == 0: img_dir_ori = FLAGS.image_dir +'/'+str(FLAGS.dataset)+'/changed_data/x_grads/number_'+str(idx)+'/'+str(itr)+'/'+str(each_nb)+'_ori.png' deep_cnn.save_fig(each.astype(np.int32), img_dir_ori) # iterate each changed data each += (x_grads_cp * FLAGS.epsilon) each_4d = np.expand_dims(each, axis=0) each_pred_lb_a = np.argmax(deep_cnn.softmax_preds(each_4d, ckpt_path_final)) print('the predicted label of each after changing is :%d ' %each_pred_lb_a) each = np.clip(each, 0, 255) img_dir = FLAGS.image_dir +'/'+str(FLAGS.dataset)+'/changed_data/x_grads/number_'+str(idx)++'/'+'img_'+str(each_nb)+'/iteration_'+str(itr)+'.png' deep_cnn.save_fig(each.astype(np.int32), img_dir) each_nb += 1 else: batch_nbs = int(np.floor(len(cgd_data) / FLAGS.batch_size)) for batch_nb in range(batch_nbs): x_grads_cp = copy.deepcopy(x_grads) # every time x_grads_cp is a still x_grads, mustnot change this line's position! print('\n---start change data of batch: %d / %d---' % (batch_nb, batch_nbs)) if batch_nb == (batch_nbs - 1): batch = cgd_data[batch_nb * FLAGS.batch_size:] else: batch = cgd_data[batch_nb * FLAGS.batch_size :(batch_nb + 1) * FLAGS.batch_size] batch_grads = deep_cnn.gradients(batch, ckpt_path_final, idx, FLAGS.tgt_lb, new=False)[0] # a batch of gradients batch_grads_cp = copy.deepcopy(batch_grads) x_grads_cp_batch = np.repeat(np.expand_dims(x_grads_cp, axis=0), len(batch), axis=0) for i in range(len(batch)): #deep_cnn.save_hotfig(x_grads_cp_batch[i], '../x_grads_cp_batch_old/'+str(i)+'.png') # save and normal #deep_cnn.save_hotfig(batch_grads_cp[i], '../batch_grads_cp_old/'+str(i)+'.png') #all 0 !! except first img x_grads_cp_batch[i][(x_grads_cp_batch[i] * batch_grads_cp[i]) <0] = 0 #deep_cnn.save_hotfig(batch_grads_cp[i], '../batch_grads_cp/'+str(i)+'.png') #all 0 !! except first img #deep_cnn.save_hotfig(x_grads_cp_batch[i], '../x_grads_cp_batch/'+str(i)+'.png') #print(x_grads_cp_batch[i]) # show how may 0 in x_grads batch_grads_flatten = np.reshape(x_grads_cp_batch, (-1, )) #print('batch_grads_flatten:',batch_grads_flatten[:100]) ct = Counter(batch_grads_flatten) print('there are %d %% pixels not changed in batch %d' % ( np.around(ct[0]/len(batch_grads_flatten) * 100), batch_nb)) batch_pred_lb_b = np.argmax(deep_cnn.softmax_preds(batch, ckpt_path_final), axis=1) print('the predicted label of batch before changing is : ', batch_pred_lb_b[:20]) # save the original 5 figures if batch_nb == 0 and itr == 0: for i in range(5): img_dir = FLAGS.image_dir +'/'+str(FLAGS.dataset)+'/changed_data/x_grads/number_'+str(idx)+'/'+'img_'+str(i)+'/iteration_'+str(itr)+'_ori.png' deep_cnn.save_fig(batch[i].astype(np.int32), img_dir) # iterate each changed data batch += (x_grads_cp_batch * FLAGS.epsilon) batch_pred_lb_a = np.argmax(deep_cnn.softmax_preds(batch, ckpt_path_final), axis=1) print('the predicted label of batch after changing is : ', batch_pred_lb_a[:20]) batch = np.clip(batch, 0, 255) # save the changed 5 figures after one iteration if batch_nb == 0: for i in range(5): img_dir = FLAGS.image_dir +'/'+str(FLAGS.dataset)+'/changed_data/x_grads/number_'+str(idx)+'/'+'img_'+str(i)+'/iteration_'+str(itr)+'.png' deep_cnn.save_fig(batch[i].astype(np.int32), img_dir) batch_nb += 1 return True