def extract_feature(train_img, test_img, path=None): """ This help to compute feature for knn from pretrained network :param FLAGS: :param ckpt_path: :return: """ # check if a certain variable has been saved in the model if config.extract_feature == 'feature': dir_path = os.path.join(config.save_model, config.dataset) dir_path = os.path.join(dir_path, 'knn_num_neighbor_' + str(config.nb_teachers)) filename = str(config.nb_teachers) + '_stdnt_resnet.checkpoint.pth.tar' filename = os.path.join(dir_path, filename) train_feature = network.pred(train_img, filename, return_feature=True) test_feature = network.pred(test_img, filename, return_feature=True) print('shape of extract feature', train_feature.shape) return train_feature, test_feature #return utils.pca(test_feature, train_feature) if config.extract_feature == 'hog': # usually the file to save all hog is too large. we decompose it into 10 pieces. train_data = None each_length = int((9 + len(train_img)) / 10) for idx in range(10): #to save pkl into several small pieces train_hog_path = os.path.join( config.hog_path, config.dataset + str(idx) + '_train_hog.pkl') if os.path.exists(train_hog_path) == False: p1 = idx * each_length p2 = min((idx + 1) * each_length, len(train_img)) print('save_hog_pkl for interval{} : {}'.format(p1, p2)) utils.save_hog(train_img[p1:p2], train_hog_path) with open(train_hog_path, 'rb') as f: if train_data is not None: train_data = np.vstack((train_data, pickle.load(f))) else: train_data = pickle.load(f) print('load hog feature shape', train_data.shape) test_hog_path = os.path.join(config.hog_path, config.dataset + '_test_hog.pkl') if os.path.exists(test_hog_path) == False: utils.save_hog(test_img, test_hog_path) with open(test_hog_path, 'rb') as f: test_data = pickle.load(f) return train_data, test_data if config.extract_feature == 'pca': return utils.pca(test_img, train_img)
def extract_feature(train_img, test_img, path=None): """ This help to compute feature for knn from pretrained network :param FLAGS: :param ckpt_path: :return: """ # check if a certain variable has been saved in the model if config.extract_feature == 'feature': # Update the feature extractor using the student model(filename) in the last iteration. # Replace the filename with the saved student model, the following in an example of the checkpoint filename = 'save_model/svhn/knn_num_neighbor_800/800_stdnt_.checkpoint.pth.tar' train_feature = network.pred(train_img, filename, return_feature=True) test_feature = network.pred(test_img, filename, return_feature=True) return train_feature, test_feature train_img = [np.asarray(data) for data in train_img] test_img = [np.asarray(data) for data in test_img] if config.extract_feature == 'hog': # usually the file to save all hog is too large. we decompose it into 10 pieces. train_data = None each_length = int((9 + len(train_img)) / 10) for idx in range(10): #Save pkl into several small pieces, incase the size of private dataset is too large train_hog_path = os.path.join( config.hog_path, config.dataset + str(idx) + '_train_hog.pkl') if os.path.exists(train_hog_path) == False: p1 = idx * each_length p2 = min((idx + 1) * each_length, len(train_img)) print('save_hog_pkl for interval{} : {}'.format(p1, p2)) utils.save_hog(train_img[p1:p2], train_hog_path) with open(train_hog_path, 'rb') as f: if train_data is not None: train_data = np.vstack((train_data, pickle.load(f))) else: train_data = pickle.load(f) print('load hog feature shape', train_data.shape) test_hog_path = os.path.join(config.hog_path, config.dataset + '_test_hog.pkl') if os.path.exists(test_hog_path) == False: utils.save_hog(test_img, test_hog_path) with open(test_hog_path, 'rb') as f: test_data = pickle.load(f) return train_data, test_data if config.extract_feature == 'pca': return utils.pca(test_img, train_img)
def train_student(nb_teachers): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :return: True if student training went well """ # Call helper function to prepare student data using teacher predictions stdnt_dataset = prepare_student_data(nb_teachers, save=True) # Unpack the student dataset stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset if config.resnet: dir_path = os.path.join(config.save_model, config.dataset) dir_path = os.path.join(dir_path, 'pate_num_teacher_' + str(config.nb_teachers)) #dir_path = os.path.join(config.save_model,'pate_'+str(config.nb_teachers)) utils.mkdir_if_missing(dir_path) filename = os.path.join(dir_path, '_stndent_resnet.checkpoint.pth.tar') print('stdnt_label used for train', stdnt_labels.shape) network.train_each_teacher(config.student_epoch, stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels, filename) final_preds = network.pred(stdnt_test_data, filename) precision = hamming_accuracy(final_preds, stdnt_test_labels, torch=False) print('Precision of student after training: ' + str(precision)) return True
def ensemble_preds(nb_teachers, stdnt_data): result_shape = (nb_teachers, len(stdnt_data)) # Create array that will hold result result = np.zeros(result_shape, dtype=np.float32) dir_path = os.path.join(config.save_model, 'pate_' + str(config.nb_teachers)) for teacher_id in range(config.nb_teachers): filename = os.path.join( dir_path, str(config.nb_teachers) + '_teachers_' + str(teacher_id) + config.arch + '.checkpoint.pth.tar') result[teacher_id] = network.pred(stdnt_data, filename) print("Computed Teacher " + str(teacher_id) + " softmax predictions") result = np.asarray(result, dtype=np.int32) return result.transpose()
def ensemble_preds(nb_teachers, stdnt_data): """ Given a dataset, a number of teachers, and some input data, this helper function queries each teacher for predictions on the data and returns all predictions in a single array. (That can then be aggregated into one single prediction per input using aggregation.py (cf. function prepare_student_data() below) :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :param stdnt_data: unlabeled student training data :return: 3d array (teacher id, sample id, probability per class) """ result_shape = (nb_teachers, len(stdnt_data), config.nb_labels) # Create array that will hold result result = np.zeros(result_shape, dtype=np.float32) # Get predictions from each teacher for teacher_id in range(nb_teachers): # Compute path of checkpoint file for teacher model with ID teacher_id if config.dataset == 'celeba': dir_path = os.path.join( config.save_model, 'pate_num_teacher_' + str(config.nb_teachers)) elif config.dataset == 'market': dir_path = os.path.join( config.save_model, 'pate_' + config.dataset + str(config.nb_teachers)) utils.mkdir_if_missing(dir_path) filename = os.path.join( dir_path, str(config.nb_teachers) + '_teachers_' + str(teacher_id) + config.arch + '.checkpoint.pth.tar') result[teacher_id] = network.pred(stdnt_data, filename) # This can take a while when there are a lot of teachers so output status print("Computed Teacher " + str(teacher_id) + " softmax predictions") return result