def main(): failed = {} for domain in ['mnist', 'cifar10']: if domain not in failed: failed[domain] = {} for architecture in ['a', 'b', 'c']: if architecture not in failed[domain]: failed[domain][architecture] = {} for test in ['standard', 'adversarial', 'relu']: comparison_path = Path( 'final-comparison') / f'{domain}-{architecture}-{test}.zip' comparison_dataset = utils.load_zip(comparison_path) failed[domain][architecture][test] = {} for index in comparison_dataset.attack_results.keys(): for attack_name, attack_result in comparison_dataset.attack_results[ index].items(): if attack_result is None: if index not in failed[domain][architecture][test]: failed[domain][architecture][test][index] = [] failed[domain][architecture][test][index].append( attack_name) with open('failed_comparisons.json', 'w') as f: json.dump(failed, f)
def read_datasets(dataset_dir, output_dir, log_dir): for domain in ['mnist', 'cifar10']: for architecture in ['a', 'b', 'c']: for test_type in ['standard', 'adversarial', 'relu']: final_dataset_path = Path( output_dir) / f'{domain}-{architecture}-{test_type}.zip' if not final_dataset_path.exists(): results_dir = Path( dataset_dir) / test_type / f'{domain}-{architecture}' print('Checking', results_dir) final_dataset = MergedComparisonDataset() for dataset_path in results_dir.iterdir(): dataset_path = dataset_path.with_suffix('.zip') dataset = utils.load_zip(dataset_path) stem = dataset_path.stem log_path = Path( log_dir ) / test_type / f'{domain}-{architecture}' / stem / 'compare.log' if log_path.exists(): with open(log_path, 'r') as f: log = f.readlines() else: log = None add_dataset(final_dataset, dataset, log) final_dataset_path.parent.mkdir(parents=True, exist_ok=True) utils.save_zip(final_dataset, final_dataset_path)
def main(reference_dataset, prefix, domain, attacks, output_file): reference = utils.load_zip(reference_dataset) indices = list(reference.attack_results.keys()) content = '' for index in indices: for architecture in ['a', 'b', 'c']: for test in ['standard', 'adversarial', 'relu']: command = f'{prefix} {domain} {architecture} {test} {attacks} {index}' content += f'{command}\n' with open(output_file, 'w') as f: f.write(content)
def main(domain, architecture, test, median_average_atol, attack_ranking_atol, pairwise_comparison_atol, win_rate_atol): print('=' * 80) print(domain, architecture, test) print('=' * 80) comparison_path = f'final-comparison/{domain}-{architecture}-{test}.zip' print('Loading comparison dataset...') comparison_dataset = utils.load_zip(comparison_path) print('Done.') comparison_dataset.print_stats( median_average_atol=median_average_atol, attack_ranking_atol=attack_ranking_atol, pairwise_comparison_atol=pairwise_comparison_atol, win_rate_atol=win_rate_atol ) print('\n' * 3)
def main(dataset_path, output_path): dataset = utils.load_zip(dataset_path) distances = {} if isinstance(dataset, AdversarialDataset): raise RuntimeError if isinstance(dataset, MergedComparisonDataset) or isinstance( dataset, AttackComparisonDataset): for index in get_indices(dataset): index_results = {} genuine = genuine_at_index(dataset, index) attack_results = attack_results_at_index(dataset, index) for attack_name, attack_result in attack_results.items(): if attack_result is None: index_results[attack_name] = None else: index_results[attack_name] = torch.abs( attack_result - genuine).max().item() distances[index] = index_results elif isinstance(dataset, MergedDataset): for index in dataset.genuines.keys(): index_results = {} genuine = dataset.genuines[index] lower_bound = dataset.lower_bounds[index] upper_bound = dataset.upper_bounds[index] distances[index] = {'lower': lower_bound, 'upper': upper_bound} else: raise NotImplementedError Path(output_path).parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w') as f: json.dump(distances, f)
def read_datasets(): for domain in ['mnist', 'cifar10']: for architecture in ['a', 'b', 'c']: for test_type in ['standard', 'adversarial', 'relu']: final_dataset_path = Path( 'final') / f'{domain}-{architecture}-{test_type}.zip' if not final_dataset_path.exists(): results_dir = Path( 'mip_results') / test_type / f'{domain}-{architecture}' print('Checking', results_dir) final_dataset = MergedDataset() for dataset_path in results_dir.iterdir(): dataset_path = dataset_path.with_suffix('.zip') dataset = utils.load_zip(dataset_path) stem = dataset_path.stem memory_log_path = Path( 'logs' ) / test_type / f'{domain}-{architecture}' / stem / 'mip_memory.dat' if memory_log_path.exists(): with open(memory_log_path, 'r') as f: memory_log = f.readlines() memory_log = parse_memory_log(memory_log) else: memory_log = None add_dataset(final_dataset, dataset, memory_log) final_dataset_path.parent.mkdir(parents=True, exist_ok=True) utils.save_zip(final_dataset, final_dataset_path)
def main(path, threshold): dataset = utils.load_zip(path) bounds = list( zip(dataset.lower_bounds.values(), dataset.upper_bounds.values())) print('Total bounds:', len(bounds)) bounds = [ bound for bound in bounds if bound[0] is not None and bound[1] is not None ] print('Non-None bounds:', len(bounds)) bounds = [bound for bound in bounds if bound[0] < 1e40 and bound[1] < 1e40] print('Non-inf bounds:', len(bounds)) valid_bounds = [ bound for bound in bounds if bound[1] - bound[0] < threshold ] print('Valid bounds:', len(valid_bounds)) plt.hist([bound[1] - bound[0] for bound in bounds], bins=100) plt.show()
def main(domain, architecture, test_name, parameter_set, start, stop, log_dir, no_log): assert stop > start create_logs = not no_log log_dir = Path( log_dir) / f'{test_name}/{domain}-{architecture}/{start}-{stop}' print(f'Attacking {domain} {architecture} ({test_name}, {start}-{stop})') dataset = 'std:test' attacks = '"[bim, brendel, carlini, deepfool, fast_gradient, pgd, uniform]"' p = 'linf' # 0 was used during development, using 1 for actual tests seed = 1 # Attacks are run on CPU, so there's no point in using higher batch sizes batch_size = 1 device = 'cpu' cpu_threads = 1 misclassification_policy = 'use_predicted' no_stats_argument = '--no-stats' if parameter_set == 'original': parameter_set_path = 'original_mip_attack_configuration.cfg' else: parameter_set_path = 'default_attack_configuration.cfg' if test_name == 'relu': state_dict_path = f'trained-models/classifiers/{test_name}/relu-pruned/{domain}-{architecture}.pth' masked_relu_argument = '--masked-relu' else: state_dict_path = f'trained-models/classifiers/{test_name}/{domain}-{architecture}.pth' masked_relu_argument = '' compare_results_path = f'comparison_results/{test_name}/{domain}-{architecture}/{start}-{stop}.zip' if os.path.exists(compare_results_path): print('Skipping Compare') else: compare_log_file = log_dir / 'compare.log' if create_logs: prepare_path(compare_log_file) compare_command = f'python cli.py compare {domain} {architecture} {dataset} {attacks} {p} ' compare_command += f'--state-dict-path {state_dict_path} {masked_relu_argument} ' compare_command += f'--batch-size {batch_size} --device {device} --cpu-threads {cpu_threads} ' compare_command += f'--misclassification-policy {misclassification_policy} {no_stats_argument} ' compare_command += f'--start {start} --stop {stop} --save-to {compare_results_path} ' compare_command += f'--deterministic --seed {seed} ' compare_command += f'--attack-config-file {parameter_set_path} ' print(f'Compare | Running command\n{compare_command}') if create_logs: run_and_log(compare_command, compare_log_file) else: os.system(compare_command) mip_results_path = f'mip_results/{test_name}/{domain}-{architecture}/{start}-{stop}.zip' if os.path.exists(mip_results_path): print('Skipping MIP') mip_results = utils.load_zip(mip_results_path) mip_results.print_stats() else: gurobi_log_dir = log_dir / 'gurobi_logs' mip_log_file = log_dir / 'mip.log' memory_log_file = log_dir / 'mip_memory.dat' if create_logs: for path in [gurobi_log_dir, mip_log_file]: prepare_path(path) # The memory log file is generated in all cases prepare_path(memory_log_file) mip_command = f'python cli.py mip {domain} {architecture} {dataset} {p} ' mip_command += f'--state-dict-path {state_dict_path} {masked_relu_argument} ' mip_command += f'--batch-size {batch_size} --device {device} --cpu-threads {cpu_threads} ' mip_command += f'--pre-adversarial-dataset {compare_results_path} ' mip_command += f'--misclassification-policy {misclassification_policy} ' mip_command += f'--start {start} --stop {stop} --save-to {mip_results_path} ' mip_command += f'--deterministic --seed {seed} ' if create_logs: mip_command += f'--gurobi-log-dir {gurobi_log_dir} ' mip_command = f'mprof run --multiprocess --python --output {memory_log_file} ' + mip_command print(f'MIP | Running command\n{mip_command}') if create_logs: run_and_log(mip_command, mip_log_file) else: os.system(mip_command)
def mip(**kwargs): command_start_timestamp = time.time() parsing.set_log_level(kwargs['log_level']) if kwargs['deterministic']: logger.warning('Determinism is not guaranteed for Gurobi.') if kwargs['seed'] is None: logger.warning( 'Determinism is enabled, but no seed has been provided.') utils.enable_determinism() if kwargs['cpu_threads'] is not None: torch.set_num_threads(kwargs['cpu_threads']) seed = kwargs['seed'] if seed is not None: utils.set_seed(kwargs['seed']) torch_model_retrieval_start_timestamp = time.time() model = parsing.parse_model(kwargs['domain'], kwargs['architecture'], kwargs['state_dict_path'], False, kwargs['masked_relu'], False, load_weights=True) model.eval() dataset_retrieval_start_timestamp = torch_model_retrieval_end_timestamp = time.time( ) dataset = parsing.parse_dataset(kwargs['domain'], kwargs['dataset'], dataset_edges=(kwargs['start'], kwargs['stop'])) dataset_retrieval_end_timestamp = time.time() dataloader = torch.utils.data.DataLoader(dataset, kwargs['batch_size'], shuffle=False) if kwargs['pre_adversarial_dataset'] is None: pre_adversarial_dataset = None else: pre_adversarial_dataset = utils.load_zip( kwargs['pre_adversarial_dataset']) if isinstance(pre_adversarial_dataset, adversarial_dataset.AttackComparisonDataset): # Use the best results to compute an adversarial dataset pre_adversarial_dataset = pre_adversarial_dataset.to_adversarial_dataset( pre_adversarial_dataset.attack_names) p = kwargs['p'] if p == 2: metric = 'l2' elif np.isposinf(p): metric = 'linf' else: raise NotImplementedError(f'Unsupported metric "l{p}"') attack_config = utils.read_attack_config_file(kwargs['attack_config_file']) attack_kwargs = attack_config.get_arguments('mip', kwargs['domain'], metric, 'standard') attack_creation_start_timestamp = time.time() attack = attacks.MIPAttack(model, p, False, seed=seed, **attack_kwargs) attack_creation_end_timestamp = time.time() mip_dataset = tests.mip_test( model, attack, dataloader, p, kwargs['misclassification_policy'], kwargs['device'], attack_config, kwargs, start=dataset.start, stop=dataset.stop, pre_adversarial_dataset=pre_adversarial_dataset, gurobi_log_dir=kwargs['gurobi_log_dir']) mip_dataset.print_stats() command_end_timestamp = time.time() mip_dataset.global_extra_info['times']['command'] = { 'start_timestamp': command_start_timestamp, 'end_timestamp': command_end_timestamp } mip_dataset.global_extra_info['times']['torch_model_retrieval'] = { 'start_timestamp': torch_model_retrieval_start_timestamp, 'end_timestamp': torch_model_retrieval_end_timestamp } mip_dataset.global_extra_info['times']['dataset_retrieval'] = { 'start_timestamp': dataset_retrieval_start_timestamp, 'end_timestamp': dataset_retrieval_end_timestamp } mip_dataset.global_extra_info['times']['attack_creation'] = { 'start_timestamp': attack_creation_start_timestamp, 'end_timestamp': attack_creation_end_timestamp } if kwargs['save_to'] is not None: utils.save_zip(mip_dataset, kwargs['save_to']) if kwargs['show'] is not None: utils.show_images(mip_dataset.genuines, mip_dataset.adversarials, limit=kwargs['show'], model=model)
def tune_mip(**kwargs): parsing.set_log_level(kwargs['log_level']) if kwargs['deterministic']: utils.enable_determinism() if not kwargs['save_to'].endswith('.prm'): raise click.BadArgumentUsage( 'save_to must have a .prm file extension.') model = parsing.parse_model(kwargs['domain'], kwargs['architecture'], kwargs['state_dict_path'], False, kwargs['masked_relu'], False, load_weights=True) model.eval() attack_config = utils.read_attack_config_file(kwargs['attack_config_file']) attack = parsing.parse_attack('mip', kwargs['domain'], kwargs['p'], 'standard', model, attack_config, 'cpu', seed=kwargs['seed']) # TODO: model.cpu()? if kwargs['pre_adversarial_dataset'] is None: pre_adversarial_dataset = None else: pre_adversarial_dataset = utils.load_zip( kwargs['pre_adversarial_dataset']) if pre_adversarial_dataset.misclassification_policy != kwargs[ 'misclassification_policy']: raise ValueError( 'The misclassification policy of the pre-adversarial dataset does ' 'not match the given policy. This can produce incorrent starting points.' ) dataset = parsing.parse_dataset(kwargs['domain'], kwargs['dataset']) # The misclassification policy "remove" messes with # indexing, so we apply it to the genuine dataset too if kwargs['misclassification_policy'] == 'remove': all_images = [] all_true_labels = [] for start in range(0, len(dataset), kwargs['batch_size']): stop = min(start + kwargs['batch_size'], len(dataset)) indices = range(start, stop) images = torch.stack([dataset[i][0] for i in indices]) true_labels = torch.stack( [torch.tensor(dataset[i][1]) for i in indices]) images, true_labels, _ = utils.apply_misclassification_policy( model, images, true_labels, 'remove') all_images += list(images) all_true_labels += list(true_labels) dataset = list(zip(all_images, all_true_labels)) if pre_adversarial_dataset is None: if kwargs['tuning_index'] == -1: tuning_index = np.random.randint(len(dataset)) else: tuning_index = kwargs['tuning_index'] pre_adversarial = None pre_image = None else: successful_indices = [ i for i in range(len(pre_adversarial_dataset)) if pre_adversarial_dataset.adversarials[i] is not None ] if kwargs['tuning_index'] == -1: tuning_index = np.random.choice(successful_indices) else: tuning_index = kwargs['tuning_index'] if tuning_index not in successful_indices: logger.warning( 'The chosen tuning_index does not have a matching ' 'pre-adversarial. Ignoring pre-adversarial optimizations.') pre_adversarial = pre_adversarial_dataset.adversarials[tuning_index] pre_adversarial = pre_adversarial.detach().cpu().numpy() pre_image = pre_adversarial_dataset.genuines[tuning_index] pre_image = pre_image.detach().cpu().numpy() image, label = dataset[tuning_index] image = image.detach().cpu().numpy() label = label.detach().cpu().item() if pre_image is not None and np.max(np.abs(image - pre_image)) > 1e-6: print(np.max(np.abs(image - pre_image))) raise RuntimeError( 'The pre-adversarial refers to a different genuine. ' 'This can slow down MIP at best and make it fail at worst. ' 'Are you sure that you\'re using the correct pre-adversarial dataset?' ) # Implicitly build the MIP model # TODO: Non ha senso avere un sistema di retry _, adversarial_result = attack.mip_attack( image, label, heuristic_starting_point=pre_adversarial) jump_model = adversarial_result['Model'] # Get the Gurobi model from julia import JuMP from julia import Gurobi from julia import Main gurobi_model = JuMP.internalmodel(jump_model).inner Gurobi.tune_model(gurobi_model) Main.model_pointer = gurobi_model Main.eval('Gurobi.get_tune_result!(model_pointer, 0)') # Save the model Gurobi.write_model(gurobi_model, kwargs['save_to'])
def main(domain, architecture, test): print('=' * 80) print(domain, architecture, test) print('=' * 80) mip_path = f'final-no-extra/{domain}-{architecture}-{test}.zip' comparison_path = f'final-comparison/{domain}-{architecture}-{test}.zip' print('Loading MIP dataset...') mip_dataset = utils.load_zip(mip_path) print('Done.') print('Loading comparison dataset...') comparison_dataset = utils.load_zip(comparison_path) print('Done.') print('Comparison success rate:') comparison_success_rates = comparison_success_rate(comparison_dataset) for key, value in comparison_success_rates.items(): print(f'{key} : {value * 100.}%') print(f'Gurobi success rate: {gurobi_success_rate(mip_dataset) * 100.}%') print(f'Gurobi bounds rate: {gurobi_bounds_rate(mip_dataset) * 100.}%') print( f'Gurobi optimality rate: {gurobi_optimality_rate(mip_dataset) * 100.}%' ) print('=' * 80) print('Upper higher than lower') violations = upper_higher_than_lower(mip_dataset) print('#violations:', len(violations)) print('Violations:') for key, (lower, upper) in violations.items(): print(f'{key}: {lower} < {upper}') if len(violations) > 0: print( 'Worst violation:', max([ torch.abs(lower - upper) for lower, upper in violations.values() ])) print('=' * 80) print('Gurobi upper is best attack') violations = gurobi_upper_is_best_attack(mip_dataset, comparison_dataset) print('#violations:', len(violations)) """print('Violations:') for key, (gurobi_upper, best_distance) in violations.items(): print(f'{key}:') print('Gurobi upper: ', gurobi_upper) print('Best distance: ', best_distance)""" if len(violations) > 0: # print() print( 'Worst violation:', max([ np.abs(lower - upper) for lower, upper in violations.values() ])) print( 'Worst violation (%):', max([(lower - upper) / lower * 100 for lower, upper in violations.values()])) print('=' * 80) print('Gurobi upper is best attack (optimal only)') violations = gurobi_upper_is_best_attack(mip_dataset, comparison_dataset, optimal_only=True) print('#violations:', len(violations)) if len(violations) > 0: # print() print( 'Worst violation:', max([ np.abs(lower - upper) for lower, upper in violations.values() ])) print( 'Worst violation (%):', max([(lower - upper) / lower * 100 for lower, upper in violations.values()])) print('=' * 80) print('Gurobi lower is best attack') violations = gurobi_lower_is_best_attack(mip_dataset, comparison_dataset) print('#violations:', len(violations)) print('Violations:') for key, (lower, upper) in violations.items(): print(f'{key}: {lower} < {upper}') if len(violations) > 0: print( 'Worst violation:', max([ torch.abs(lower - upper) for lower, upper in violations.values() ])) print('\n' * 3)