def batch_adv_tetsing(device, num_models, seed_data, adv_folder, mutated_models_path, model_start_num,seed_model): if seed_data == 'mnist': normalization = normalize_mnist img_mode = 'L' # 8-bit pixels, black and white elif seed_data == 'cifar10': normalization = normalize_cifar10 img_mode = None elif seed_data == 'ilsvrc12': normalization = normalize_imgNet img_mode = None else: raise Exception('Unknown data soure!') adv_type = parseAdvType(adv_folder) tf = transforms.Compose([transforms.ToTensor(), normalization]) logging.info('>>>>>>>>>>>seed data:{},mutated_models:{}<<<<<<<<<<'.format(seed_data, mutated_models_path)) mutated_models = fetch_models(mutated_models_path, num_models, device=device, start_no=model_start_num,seed_model=seed_model) ensemble_model = EnsembleModel(mutated_models) dataset = MyDataset(root=adv_folder, transform=tf, img_mode=img_mode) dataloader = DataLoader(dataset=dataset) logging.info( '>>>Progress: {} mutated models for {}, samples {}'.format(len(mutated_models), adv_type, adv_folder)) logging.info('>>Test-Details-start-{}>>>{}>>>{}'.format(num_models, adv_type,seed_data)) samples_filter(ensemble_model, dataloader, '{} >> {} '.format(num_models, adv_type), size=-1, show_progress=False, device=device, is_verbose=True) logging.info('>>Test-Details-end-{}>>>{}>>>{}'.format(num_models, adv_type,seed_data))
def batch_wl_testing(device, num_models, seed_data, raw_data_path, seed_model, mutated_models_path, model_start_num, use_train=True): if seed_data == 'mnist': data_type = DATA_MNIST elif seed_data == 'cifar10': data_type = DATA_CIFAR10 dataset, channel = load_data_set(data_type, raw_data_path, train=use_train) dataloader = DataLoader(dataset=dataset) wrong_labeled = samples_filter(seed_model, dataloader, return_type='adv', name='seed model', device=device,show_accuracy=False) data = datasetMutiIndx(dataset, [idx for idx, _, _ in wrong_labeled]) wrong_labels = [wrong_label for idx, true_label, wrong_label in wrong_labeled] data = TensorDataset(data.tensors[0], data.tensors[1], torch.LongTensor(wrong_labels)) logging.info( '>>>>>>>>>>>For {}({}),mutated Models Path: {} <<<<<<<<<<'.format( seed_data,"Training" if use_train else "Testing",mutated_models_path)) mutated_models = fetch_models(mutated_models_path, num_models, device=device, start_no=model_start_num,seed_model=seed_model) ensemble_model = EnsembleModel(mutated_models) logging.info( '>>>Progress: {} mutated models for wl samples, '.format(len(mutated_models))) logging.info('>>Test-Details-start-{}>>> wrong labeled of {}'.format(num_models, seed_data)) samples_filter(ensemble_model, DataLoader(dataset=data), 'legitimate {} >>'.format(seed_data), size=-1, show_progress=False, device=device, is_verbose=True) logging.info('>>Test-Details-end-{}>>> wrong labeled of {}'.format(num_models, seed_data))
def batch_legitimate_testing(device, num_models, seed_data, raw_data_path, seed_model, mutated_models_path, model_start_num, use_train=True): if seed_data == 'mnist': data_type = DATA_MNIST elif seed_data == 'cifar10': data_type = DATA_CIFAR10 data, channel = load_data_set(data_type, raw_data_path, train=use_train) correct_labeled = samples_filter(seed_model, DataLoader(dataset=data), return_type='normal', name='seed model', device=device,show_accuracy=False) random_indcies = np.arange(len(correct_labeled)) np.random.seed(random_seed) np.random.shuffle(random_indcies) random_indcies = random_indcies[:MAX_NUM_SAMPLES] data = datasetMutiIndx(data, np.array([idx for idx, _, _ in correct_labeled])[random_indcies]) logging.info( '>>>>>>>>>>>For {}({}) randomly choose {} with randomseed {}. mutated_models:{}<<<<<<<<<<'.format( seed_data,"Training" if use_train else "Testing",MAX_NUM_SAMPLES,random_seed,mutated_models_path)) mutated_models = fetch_models(mutated_models_path, num_models,device=device, start_no=model_start_num,seed_model=seed_model) ensemble_model = EnsembleModel(mutated_models) logging.info( '>>>Progress: {} mutated models for normal samples, samples path: {}'.format(len(mutated_models), raw_data_path)) logging.info('>>Test-Details-start-{}>>>{}'.format(num_models, seed_data)) samples_filter(ensemble_model, DataLoader(dataset=data), 'legitimate {} >>'.format(seed_data), size=-1, show_progress=False, device=device, is_verbose=True) logging.info('>>Test-Details-end-{}>>>{}'.format(num_models, seed_data))
def step_mutated_vote(models_folder, model_name_list, target_samples, samples_folder, useAttackSeed=True, dataloader=None): ''' step=10,up to 100 :param model_folder: :param model_name_list: :param target_samples: :param samples_folder: :return: ''' for i, targe_sample in enumerate(target_samples): # i += 3 just for mnist4, mnist5 if not dataloader: adv_file_path = os.path.join(samples_folder, targe_sample) torch.manual_seed(random_seed) dataset = MyDataset(root=adv_file_path, transform=transforms.Compose([transforms.ToTensor(), normalize_mnist])) dataloader = DataLoader(dataset=dataset, shuffle=True) print('>>>Progress: Test attacked samples of {} '.format(targe_sample)) logging.info('>>>Progress: Test attacked samples of {} '.format(targe_sample)) # for num_models in range(10, 110, 10): for num_models in [100]: # to do # 1. for each seed model, select the top [num_models] models # 2. ensemble 5*[num_models] models num_seed_models = len(model_name_list) models_list = [] for i2, seed_name in enumerate(model_name_list): if useAttackSeed: models_list.extend(fetch_models(models_folder, num_models, seed_name)) elif i != i2: models_list.extend(fetch_models(models_folder, num_models, seed_name)) logging.info('>>>Progress: {} models for {}'.format(len(models_list), targe_sample)) print('>>>Progress: {} models for {}'.format(len(models_list), targe_sample)) vote_model = EnsembleModel(models_list) logging.info('>>Test-Details-start-{}>>>{}'.format(num_seed_models * num_models, targe_sample)) samples_filter(vote_model, dataloader, '{} >> {} '.format(len(models_list), targe_sample), size=-1, show_progress=True) logging.info('>>Test-Details-end-{}>>>{}'.format(num_seed_models * num_models, targe_sample))
def __init__(self, threshold, sigma, beta, alpha, seed_name, max_mutated_numbers, data_type, device='cpu', models_folder=None): self.threshold = threshold self.sigma = sigma self.beta = beta self.alpha = alpha self.device = device self.data_type = data_type self.models_folder = models_folder self.seed_name = seed_name self.max_mutated_numbers = max_mutated_numbers self.start_no = 1 self.seed_model_shell = GoogLeNet( ) if seed_name == "googlenet" else MnistNet4() if data_type == DATA_MNIST: self.max_models_in_memory = self.max_mutated_numbers self.mutated_models = fetch_models(models_folder, self.max_models_in_memory, self.device, self.seed_model_shell, start_no=self.start_no) else: self.max_models_in_memory = 20 self.mutated_models = fetch_models(models_folder, self.max_models_in_memory, self.device, self.seed_model_shell, start_no=self.start_no) self.start_no += self.max_models_in_memory
def fetch_single_model(self, t): ''' :param t: fetch the t th model. from 1-index :return: ''' if self.data_type == DATA_MNIST: return self.mutated_models[t - 1] else: if t <= self.start_no: return self.mutated_models[t % 20 - 1] else: self.mutated_models = fetch_models(self.models_folder, 20, self.device, self.seed_model_shell, start_no=self.start_no) self.start_no += self.max_models_in_memory return self.mutated_models[t % 20 - 1]
def batch_legitimate_testing(device, num_models, seed_data, raw_data_path, seed_model, mutated_models_path, model_start_num, use_train=True): if seed_data == 'mnist': data_type = DATA_MNIST elif seed_data == 'cifar10': data_type = DATA_CIFAR10 data = load_natural_data(True, data_type, raw_data_path, use_train=use_train, seed_model=seed_model, device=device, MAX_NUM_SAMPLES=MAX_NUM_SAMPLES) logging.info( '>>>>>>>>>>>For {}({}) randomly choose {} with randomseed {}. mutated_models:{}<<<<<<<<<<' .format(seed_data, "Training" if use_train else "Testing", MAX_NUM_SAMPLES, random_seed, mutated_models_path)) mutated_models = fetch_models(mutated_models_path, num_models, device=device, start_no=model_start_num, seed_model=seed_model) ensemble_model = EnsembleModel(mutated_models) logging.info( '>>>Progress: {} mutated models for normal samples, samples path: {}'. format(len(mutated_models), raw_data_path)) logging.info('>>Test-Details-start-{}>>>{}'.format(num_models, seed_data)) samples_filter(ensemble_model, DataLoader(dataset=data), 'legitimate {} >>'.format(seed_data), size=-1, show_progress=False, device=device, is_verbose=True) logging.info('>>Test-Details-end-{}>>>{}'.format(num_models, seed_data))