def main(): """Inversion Generator just works for Inversion model Argparse structure is different from other running profile. """ parser = argparse.ArgumentParser(description='Generate inversion images') parser.add_argument('modelpath', metavar='P', type=str, help="Path of Inversion model") parser.add_argument('--expansion', '-e', metavar='E', type=int, help="Image expansion factor", default=200) parser.add_argument('--save-path', '-s', type=str, help="Path of generated image, optional") parser.add_argument('--channel', '-c', type=int, help="Inversion model output image channel", default=1) parser.add_argument('--num-classes', '-n', type=int, help="Inversion classifier input classes", default=10) parser.add_argument('--complexity', '-x', type=int, help="Inversion model conv channel size.", default=64) parser.add_argument('--blackbox', '-b', type=str, help="Full vector", default=None) parser.add_argument('--testset', metavar='DSET', type=str, help="If using full vector", default=None) parser.add_argument('-d', '--device-id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) args = parser.parse_args() model = get_net('Inversion', 'custom_cnn', pretrained=args.modelpath, num_classes=args.num_classes, channel=args.channel, complexity=args.complexity) if args.device_id >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.device_id) device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) if args.save_path: save_path = args.save_path else: save_path = os.path.join(os.path.dirname(args.modelpath), 'generated') if not os.path.exists(save_path): os.mkdir(save_path) if args.testset is None: get_imgs(model, save_path, args.expansion, args.num_classes) else: blackbox = Blackbox.from_modeldir(args.blackbox, device=device) assert args.testset in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[args.testset] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] testset = datasets.__dict__[args.testset](train=False, transform=transform) results = [] dataloader = DataLoader(testset, 128, False) total = 0 img_vectors = [] for inputs, targets in tqdm(dataloader): vector = blackbox(inputs) imgs = model(vector.to(device)).cpu() img_vectors.append(imgs) for i in range(imgs.shape[0]): img_vectors.append(imgs[i]) # save_image(imgs[i], os.path.join(save_path, "{}.{}.bmp".format(targets[i], total + i))) total += imgs.shape[0] np.random.shuffle(img_vectors) for i in range(args.expansion): save_image(img_vectors[i], os.path.join(save_path, "{}.bmp".format(total + i)))
def main(): parser = argparse.ArgumentParser(description='Construct transfer set') parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training', choices=['random', 'adaptive']) parser.add_argument( 'victim_model_dir', metavar='PATH', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) #??? Why do we need this? parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=8) # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes', # default=None) # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals', # default=None) # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data', # default=1.0) # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data', # default=1.0) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('-n', '--ngrams', metavar='NG', type=int, help='#n-grams', default=2) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up queryset queryset_name = params['queryset'] ngrams = params['ngrams'] valid_datasets = list(text_classification.DATASETS.keys()) if queryset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] # transform = datasets.modelfamily_to_transforms[modelfamily]['test'] # queryset = datasets.__dict__[queryset_name](train=True, transform=transform) dataset_dir = '.data' dataset_dir = dataset_dir + '/' + queryset_name.lower() + '_csv' train_data_path = os.path.join( dataset_dir, queryset_name + "_ngrams_{}_train.data".format(ngrams)) test_data_path = os.path.join( dataset_dir, queryset_name + "_ngrams_{}_test.data".format(ngrams)) if not (os.path.exists(train_data_path) and os.path.exists(test_data_path)): if not os.path.exists('.data'): print("Creating directory {}".format(dataset_dir)) os.mkdir('.data') trainset, testset = text_classification.DATASETS[queryset_name]( root='.data', ngrams=ngrams) print("Saving train data to {}".format(train_data_path)) torch.save(trainset, train_data_path) print("Saving test data to {}".format(test_data_path)) torch.save(testset, test_data_path) else: print("Loading train data from {}".format(train_data_path)) trainset = torch.load(train_data_path) print("Loading test data from {}".format(test_data_path)) testset = torch.load(test_data_path) queryset, _ = trainset, testset vocab_size = len(trainset.get_vocab()) num_classes = len(trainset.get_labels()) # ----------- Initialize blackbox i.e. victim model blackbox_dir = params['victim_model_dir'] embed_dim = 32 blackbox = Blackbox.from_modeldir(blackbox_dir, vocab_size, num_classes, embed_dim, device) # ----------- Initialize adversary batch_size = params['batch_size'] nworkers = params['nworkers'] transfer_out_path = osp.join(out_path, 'transferset.pickle') if params['policy'] == 'random': adversary = RandomAdversary(blackbox, queryset, batch_size=batch_size) elif params['policy'] == 'adaptive': raise NotImplementedError() else: raise ValueError("Unrecognized policy") print('=> constructing transfer set...') transferset = adversary.get_transferset(params['budget']) with open(transfer_out_path, 'wb') as wf: pickle.dump(transferset, wf) print('=> transfer set ({} samples) written to: {}'.format( len(transferset), transfer_out_path)) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_transfer.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def __init__(self, model_arch: str, state_dir: str, testset: str, pretrained: str = None, sampleset: str = None, blackbox_path: str = None, cuda: bool = True, complexity: int = 64, optimizer_choice: str = 'sgdm', batch_size: int = 64, topk: int = 0, argmax: bool = False, num_workers: int = 16, **kwargs) -> None: self.device = torch.device('cuda') if cuda else torch.device('cpu') self.state_dir = state_dir # Store model checkpoint, selected state in Active thief etc. self.selection, self.transfer, self.indices_list = load_state( state_dir) if not os.path.exists(state_dir): os.makedirs(state_dir) self.cuda = cuda if blackbox_path is None: # if blackbox_path is None, no blackbox model is involved in model training. self.blackbox = None else: self.blackbox = Blackbox.from_modeldir(blackbox_path, self.device) modelfamily = datasets.dataset_to_modelfamily[testset] # Work around for MNIST. MNISTlike is one channel image and is normalized with specific parameter. if testset in ('MNIST', 'KMNIST', 'EMNIST', 'EMNISTLetters', 'FashionMNIST'): self.channel = 1 self.transforms = datasets.MNIST_transform else: self.channel = 3 self.transforms = datasets.modelfamily_to_transforms[modelfamily] # For absolute accuracy test. self.testset = datasets.__dict__[testset]( train=False, transform=self.transforms['test']) if sampleset is not None: self.sampleset = datasets.__dict__[sampleset]( train=True, transform=self.transforms['train']) else: self.sampleset = None self.argmax = argmax self.batch_size = batch_size # For relative accuracy test. self.query = lambda data: query(self.blackbox, data, len( data), self.argmax, self.batch_size, self.device, self.topk) self.evaluation_set = query(self.blackbox, unpack(self.testset), len(self.testset), True, self.batch_size, self.device) self.num_classes = len(self.testset.classes) self.target_model = get_net(model_arch, modelfamily, pretrained=pretrained, channel=self.channel, complexity=complexity, num_classes=self.num_classes).to( self.device) self.optim = get_optimizer(self.target_model.parameters(), optimizer_choice, **kwargs) self.criterion = soft_cross_entropy self.batch_size = batch_size self.topk = topk self.num_workers = num_workers self.kwargs = kwargs
agreement += torch.sum(labels_bb.cpu() == labels_sur.cpu()).int() transfer += torch.sum(adv_labels_bb.cpu() == targets.cpu()).int() if targeted else torch.sum( adv_labels_bb.cpu() != targets.cpu()).int() print("Agreement: {}".format(agreement / total)) print("Transferability: {}".format(transfer / total)) return transfer / total if __name__ == '__main__': # this block of code is only for temporary test. from datasets import GTSRB from datasets import modelfamily_to_transforms transform = modelfamily_to_transforms['custom_cnn']['train'] dataset = GTSRB(False, transform) from knockoff.victim.blackbox import Blackbox import torch device = torch.device('cuda') blackbox = Blackbox.from_modeldir('results/models/victim/gtsrb', device) from models import zoo surrogate = zoo.get_net( 'CNN32', 'custom_cnn', 'results/models/adversary/manhattan/checkpoint.28.iter.pth.tar', num_classes=43) transfer = transferability(blackbox, surrogate, dataset, targeted=False)
import pickle import torch from tqdm import tqdm import numpy as np from datasets.imagenet64 import ImageNet64 from knockoff.victim.blackbox import Blackbox from datasets import modelfamily_to_transforms path = "results/models/adversary/try_with_original_test_label/selection.15000.pickle" indexes = pickle.load(open(path, 'rb')) queryset = ImageNet64( train=True, transform=modelfamily_to_transforms['custom_cnn']['train']) device = torch.device('cuda') blackbox_dir = 'results/models/victim/gtsrb-cnn32' blackbox = Blackbox.from_modeldir(blackbox_dir, device) transferset = [] query = [] def get_transferset(budget, idx_set, queryset, blackbox, batch_size=128): start_B = 0 end_B = budget with tqdm(total=budget) as pbar: for t, B in enumerate(range(start_B, end_B, batch_size)): idxs = np.random.choice(list(idx_set), replace=False, size=min(batch_size, budget - len(transferset))) idx_set = idx_set - set(idxs)
def main(): parser = argparse.ArgumentParser(description='Construct transfer set') parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training', choices=['random', 'adaptive']) parser.add_argument( 'victim_model_dir', metavar='PATH', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=8) parser.add_argument('--root', metavar='DIR', type=str, help='Root directory for ImageFolder', default=None) parser.add_argument('--modelfamily', metavar='TYPE', type=str, help='Model family', default=None) # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes', # default=None) # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals', # default=None) # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data', # default=1.0) # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data', # default=1.0) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up queryset queryset_name = params['queryset'] valid_datasets = datasets.__dict__.keys() if queryset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] if params[ 'modelfamily'] is None else params['modelfamily'] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] if queryset_name == 'ImageFolder': assert params[ 'root'] is not None, 'argument "--root ROOT" required for ImageFolder' queryset = datasets.__dict__[queryset_name](root=params['root'], transform=transform) else: queryset = datasets.__dict__[queryset_name](train=True, transform=transform) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] blackbox = Blackbox.from_modeldir(blackbox_dir, device) # ----------- Initialize adversary batch_size = params['batch_size'] nworkers = params['nworkers'] transfer_out_path = osp.join(out_path, 'transferset.pickle') if params['policy'] == 'random': adversary = RandomAdversary(blackbox, queryset, batch_size=batch_size) elif params['policy'] == 'adaptive': raise NotImplementedError() else: raise ValueError("Unrecognized policy") print('=> constructing transfer set...') transferset = adversary.get_transferset(params['budget']) with open(transfer_out_path, 'wb') as wf: pickle.dump(transferset, wf) print('=> transfer set ({} samples) written to: {}'.format( len(transferset), transfer_out_path)) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_transfer.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def parser_dealer(option: Dict[str, bool]) -> Dict[str, Any]: parser = argparse.ArgumentParser(description='Train a model') # Required arguments if option['transfer']: parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training', choices=['random', 'adaptive']) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) if option['active']: parser.add_argument('strategy', metavar='S', type=str, help='Active Sample Strategy', choices=['kcenter', 'random', 'dfal']) parser.add_argument('--metric', metavar="M", type=str, help='K-Center method distance metric', choices=['euclidean', 'manhattan', 'l1', 'l2'], default='euclidean') parser.add_argument('--initial-size', metavar='N', type=int, help='Active Learning Initial Sample Size', default=100) parser.add_argument('--budget-per-iter', metavar='N', type=int, help='budget for every iteration', default=100) parser.add_argument('--iterations', metavar='N', type=int, help='iteration times', default=10) if option['sampling']: parser.add_argument( 'sampleset', metavar='DS_NAME', type=str, help= 'Name of sample dataset in active learning selecting algorithms') parser.add_argument('--load-state', action='store_true', default=False, help='Turn on if load state.') parser.add_argument('--state-suffix', metavar='SE', type=str, help='load selected samples from sample set', required=False, default='') if option['synthetic']: parser.add_argument('synthetic_method', metavar='SM', type=str, help='Synthetic Method', choices=['fgsm', 'ifgsm', 'mifgsm']) parser.add_argument('eps', metavar='E', type=float, help='Synthetic maximum epsilon') parser.add_argument( 'targeted_method', metavar='T', type=str, help='Target methods', choices=['non-targeted', 'targeted-random', 'targeted-topk']) if option['black_box']: parser.add_argument( 'victim_model_dir', metavar='VIC_DIR', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--pseudoblackbox', action='store_true', help='Load prequeried labels as blackbox', default=False) parser.add_argument('--topk', metavar='TK', type=int, help='iteration times', default=0) if option['train']: parser.add_argument('model_dir', metavar='SUR_DIR', type=str, help='Surrogate Model Destination directory') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') # Optional arguments parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('-x', '--complexity', type=int, default=64, metavar='N', help="Model conv channel size.") parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) parser.add_argument('--optimizer-choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) # apply to all circumstances parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-d', '--device-id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-w', '--num-workers', metavar='N', type=int, help='# Worker threads to load data', default=10) args = parser.parse_args() params = vars(args) device = device_dealer(**params) params['device'] = device if option['black_box']: blackbox_dir = params['victim_model_dir'] if params['pseudoblackbox']: params['blackbox'] = PseudoBlackbox(blackbox_dir) else: params['blackbox'] = Blackbox.from_modeldir(blackbox_dir, device) if option['active']: pass if option['sampling']: sample_set_name = params['sampleset'] assert sample_set_name in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[sample_set_name] transform = datasets.modelfamily_to_transforms[modelfamily]['train'] dataset = datasets.__dict__[sample_set_name](train=True, transform=transform) params['queryset'] = dataset params['selected'] = set() if params['load_state']: total = set([i for i in range(len(dataset))]) path = params['model_dir'] params['selection'], params['transferset'], params[ 'selected_indices'] = load_state(path, params['state_suffix']) if option['train']: testset_name = params['testdataset'] assert testset_name in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[testset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] testset = datasets.__dict__[testset_name](train=False, transform=transform) params['testset'] = testset pretrained_path = params['pretrained'] model_arch = params['model_arch'] if params['pseudoblackbox']: num_classes = params['blackbox'].train_results[0].shape[0] else: num_classes = len(testset.classes) sample = testset[0][0] model = zoo.get_net(model_arch, modelfamily, pretrained_path, num_classes=num_classes, channel=sample.shape[0], complexity=params['complexity']) params['surrogate'] = model.to(device) return params
def main(): parser = argparse.ArgumentParser( description='Construct apaptive transfer set') parser.add_argument( 'victim_model_dir', metavar='PATH', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=8) # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes', # default=None) # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals', # default=None) # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data', # default=1.0) # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data', # default=1.0) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--defense', type=str, help='Defense strategy used by victim side', default=None) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path + "-adaptive") defense = params['defense'] if defense: transfer_out_path = osp.join(out_path + "-adaptive", 'transferset-' + defense + '.pickle') else: transfer_out_path = osp.join(out_path + "-adaptive", 'transferset.pickle') torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up queryset queryset_name = params['queryset'] valid_datasets = datasets.__dict__.keys() if queryset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] queryset = datasets.__dict__[queryset_name](train=True, transform=transform) queryset.targets = [ queryset.samples[idx][1] for idx in range(len(queryset)) ] # code.interact(local=dict(globals(), **locals())) num_classes = len(queryset.classes) # code.interact(local=dict(globals(), **locals())) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] blackbox = Blackbox.from_modeldir(blackbox_dir, defense, device) # ----------- Initialize Knockoff Nets model_name = params['model_arch'] pretrained = params['pretrained'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes) model = model.to(device) adversary = AdaptiveAdversary(queryset, blackbox, model, device, reward='all') print('=> constructing transfer set...') transferset = adversary.get_transferset(params['budget']) with open(transfer_out_path, 'wb') as wf: pickle.dump(transferset, wf) print('=> transfer set ({} samples) written to: {}'.format( len(transferset), transfer_out_path)) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_transfer.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def main(): parser = argparse.ArgumentParser( description='Select deepfool images, retrain the target model.') parser.add_argument( 'model_dir', metavar='SUR_DIR', type=str, help= 'Surrogate Model Destination directory, which may contain selecting state, ' 'aka, selection.pickle, transferset.pickle, select_indices.pickle') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument( 'blackbox_dir', metavar='VIC_DIR', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument( 'sampleset', metavar='DS_NAME', type=str, help='Name of sample dataset in deepfool selecting algorithms') parser.add_argument('deepfool_budget', metavar='N', type=int, help='deepfool selection size.') parser.add_argument( '--state-budget', type=int, help="if > 0, load corresponding budget of selection state.", default=0) parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--topk', metavar='TK', type=int, help='iteration times', default=0) parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('-x', '--complexity', type=int, default=64, metavar='N', help="Model conv channel size.") parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('--pretrained', type=str, help='Use pretrained network, or a checkpoint', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) parser.add_argument('--optimizer-choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-d', '--device-id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-w', '--num-workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('-pen', '--ispenalty', metavar='N', type=bool, help='# use penalty matrix', default=True) args = parser.parse_args() params = vars(args) device = device_dealer(device_id=args.device_id) blackbox = Blackbox.from_modeldir(args.blackbox_dir, device) assert args.sampleset in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[args.sampleset] transform = datasets.modelfamily_to_transforms[modelfamily]['train'] queryset = datasets.__dict__[args.sampleset](train=True, transform=transform) if args.state_budget > 0: selection, transfer, indices_list = load_state( state_dir=args.model_dir) selection = set(indices_list[:args.state_budget]) transfer = transfer[:args.state_budget] indices_list = indices_list[:args.state_budget] else: selection, transfer, indices_list = set(), [], [] testset_name = args.testdataset assert testset_name in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[testset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] testset = datasets.__dict__[testset_name](train=False, transform=transform) num_classes = len(testset.classes) pretrained_path = params['pretrained'] model_arch = params['model_arch'] sample = testset[0][0] model = zoo.get_net(model_arch, modelfamily, pretrained_path, num_classes=num_classes, channel=sample.shape[0], complexity=params['complexity']) model = model.to(device) penalty = np.ones((10, 10)) deepfool_choose(model, blackbox, queryset, testset, selection, transfer, indices_list, device, penalty, **params)
def main(): parser = argparse.ArgumentParser(description='Train a model') # Required arguments parser.add_argument('victim_model_dir', metavar='PATH', type=str, help='Directory of Victim Blackbox') parser.add_argument('model_dir', metavar='DIR', type=str, help='Directory containing transferset.pickle') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument( '--budgets', metavar='B', type=str, help= 'Comma separated values of budgets. Knockoffs will be trained for each budget.' ) parser.add_argument( '--rounds', metavar='R', type=str, help='Comma seperates values of duplication rounds of each budget.') # Optional arguments parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--init_alpha', type=float, default=1.0, metavar='I', help='initial iteration step (default: 1.0)') parser.add_argument( '--num_steps', type=int, default=80, metavar='I', help='iteration steps of each crafted sample (default: 80)') parser.add_argument( '--eps', type=float, default=255.0, metavar='E', help='maximum change that can be done on a image. (default: 255.0)') parser.add_argument('--method', type=str, default='topk-IFGSMMod', metavar='METHOD', help='direction_method-gradient_method') parser.add_argument('--directions', type=int, default=2, metavar='D', help='directions') parser.add_argument('--max_pixel', type=float, default=1.0, metavar='P', help='upper bound') parser.add_argument('--min_pixel', type=float, default=0.0, metavar='P', help='lower bound') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) # Attacker's defense parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--optimizer_choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) args = parser.parse_args() params = vars(args) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') train.model_dir = params['model_dir'] # ----------- Set up transferset transferset_path = osp.join(train.model_dir, 'transferset.pickle') with open(transferset_path, 'rb') as rf: transferset_samples = pickle.load(rf) num_classes = transferset_samples[0][1].size(0) print('=> found transfer set with {} samples, {} classes'.format( len(transferset_samples), num_classes)) # ----------- Clean up transfer (if necessary) if params['argmaxed']: new_transferset_samples = [] print('=> Using argmax labels (instead of posterior probabilities)') for i in range(len(transferset_samples)): x_i, y_i = transferset_samples[i] argmax_k = y_i.argmax() y_i_1hot = torch.zeros_like(y_i) y_i_1hot[argmax_k] = 1. new_transferset_samples.append((x_i, y_i_1hot)) transferset_samples = new_transferset_samples # ----------- Set up testset dataset_name = params['testdataset'] valid_datasets = datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[dataset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] if dataset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) dataset = datasets.__dict__[dataset_name] testset = dataset(train=False, transform=transform) if len(testset.classes) != num_classes: raise ValueError( '# Transfer classes ({}) != # Testset classes ({})'.format( num_classes, len(testset.classes))) # ----------- Set up model model_name = params['model_arch'] pretrained = params['pretrained'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes) model = model.to(device) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] blackbox = Blackbox.from_modeldir(blackbox_dir, device) # ----------- Set up train params budgets = [int(b) for b in params['budgets'].split(',')] rounds = [int(r) for r in params['rounds'].split(',')] np.random.seed(cfg.DEFAULT_SEED) torch.manual_seed(cfg.DEFAULT_SEED) torch.cuda.manual_seed(cfg.DEFAULT_SEED) train.optimizer = get_optimizer(model.parameters(), params['optimizer_choice'], **params) train.criterion_train = model_utils.soft_cross_entropy train.params = params train.device = device train.testset = testset print(params) # Set up crafter params original_samples = transferset_samples[:] adversary = SyntheticAdversary(blackbox=blackbox, classifier=model, device=device, **params) for b, r in zip(budgets, rounds): if params['pretrained'] is None: train(model, original_samples, b, 1) total_samples = transferset_samples latest_samples = random.sample(total_samples, b) for r in range(2, r + 1): latest_samples = adversary.synthesize(latest_samples) transferset_samples = original_samples[:] transferset_samples.extend(latest_samples) total_samples.extend(latest_samples) train(model, transferset_samples, b, r) latest_samples = random.sample(total_samples, b) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(train.model_dir, 'params_train.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def main(): parser = argparse.ArgumentParser(description='Train a model') # Required arguments parser.add_argument('model_dir', metavar='DIR', type=str, help='Directory containing transferset.pickle') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument('--budgets', metavar='B', type=int, help='Knockoffs will be trained for budget.') # Optional arguments parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-e', '--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) # RL arguments parser.add_argument('--traj_length', metavar='N', type=int, help='# Step in one trajactory', default=10) parser.add_argument('--num_each_class', metavar='N', type=int, help='# sample in each class', default=1) parser.add_argument('--n_iter', metavar='N', type=int, help='# iterations of RL training', default=10) parser.add_argument('--n_traj_each_iter', metavar='N', type=int, help='# trajactories / iter', default=10) parser.add_argument('--queryset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument('--victim_model_dir', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--n_layers', metavar='N', type=int, help='# layers in policy', default=4) parser.add_argument('--size', metavar='N', type=int, help='size of layer in policy', default=64) parser.add_argument('--policy_lr', type=float, default=1e-4, metavar='N', help='Policy learning rate') parser.add_argument('--num_agent_train_steps_per_iter', metavar='N', type=int, help='num_agent_train_steps_per_iter', default=10) parser.add_argument('--agent_train_batch_size', metavar='N', type=int, help='num_agent_train_steps_per_iter', default=990) parser.add_argument('--policy_gamma', type=float, default=0.9, metavar='N', help='reward discounting') parser.add_argument('--eps_random', type=float, default=-1, metavar='N', help='eps random exploration') parser.add_argument('--nn_baseline', action='store_true', help='Use nn baseline', default=False) # Attacker's defense parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--optimizer_choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) args = parser.parse_args() params = vars(args) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') ptu.init_gpu() else: device = torch.device('cpu') model_dir = params['model_dir'] # ----------- Set up testset dataset_name = params['testdataset'] valid_datasets = datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[dataset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] if dataset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) dataset = datasets.__dict__[dataset_name] testset = dataset(train=False, transform=transform) #if len(testset.classes) != num_classes: # raise ValueError('# Transfer classes ({}) != # Testset classes ({})'.format(num_classes, len(testset.classes))) # ----------- Set up queryset queryset_name = params['queryset'] valid_datasets = datasets.__dict__.keys() if queryset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['train'] try: queryset = datasets.__dict__[queryset_name](train=True, transform=transform) except: queryset = datasets.__dict__[queryset_name](split="train", transform=transform) num_classes = len(queryset.classes) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] blackbox = Blackbox.from_modeldir(blackbox_dir, device) # ----------- Set up adversary model model_name = params['model_arch'] pretrained = params['pretrained'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) adv_model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=10) adv_model = adv_model.to(device) # ----------- Initialize adversary num_each_class = params['num_each_class'] agent_params = { "ac_dim": num_classes, "ob_dim": len(testset.classes), "n_layers": params["n_layers"], "size": params["size"], "discrete": True, "learning_rate": params["policy_lr"], "num_agent_train_steps_per_iter": params["num_agent_train_steps_per_iter"], "agent_train_batch_size": params["agent_train_batch_size"], "gamma": params["policy_gamma"], "reward_to_go": True, "nn_baseline": params["nn_baseline"], "standardize_advantages": True, "eps_random": params["eps_random"] } adversary = PGAdversary(queryset, num_each_class, agent_params) # ----------- Set up transferset def collect_training_trajectories(length, n_traj=10): nonlocal avg_rewards, avg_components paths = [] mean_rew = 0 mean_cert = mean_L = mean_E = mean_div = 0 X_paths, Y_paths = [], [] for _ in range(n_traj): obs, acs, rewards, next_obs = [], [], [], [] r_certs, r_Ls, r_Es, r_divs = [], [], [], [] X_path, Y_path = [], [] X, actions = adversary.init_sampling() X_path.append(X) ob = blackbox(X) Y_path.append(ob) ob = ob.numpy() for t in range(length - 1): with torch.no_grad(): # Observe and react obs.append(ob) X_new, actions = adversary.sample(ob) X_path.append(X_new) acs.append(actions) # Env gives feedback, which is a new observation X_new = X_new.to(device) ob = blackbox(X_new) Y_path.append(ob) ob = ob.cpu().numpy() next_obs.append(ob) Y_adv = adv_model(X_new) Y_adv = F.softmax(Y_adv, dim=1).cpu().numpy() reward, r_cert, r_L, r_E, r_div = adversary.agent.calculate_reward( ob, np.concatenate(acs), Y_adv) rewards.append(reward) r_certs.append(r_cert) r_Ls.append(r_L) r_Es.append(r_E) r_divs.append(r_div) obs = np.concatenate(obs) acs = np.concatenate(acs) rewards = np.concatenate(rewards) mean_rew += np.mean(rewards) mean_cert += np.mean(np.concatenate(r_certs)) mean_L += np.mean(np.concatenate(r_Ls)) mean_E += np.mean(np.array(r_Es)) mean_div += np.mean(np.array(r_divs)) next_obs = np.concatenate(next_obs) path = { "observation": obs, "action": acs, "reward": rewards, "next_observation": next_obs } paths.append(path) X_paths.append(torch.cat(X_path)) Y_paths.append(torch.cat(Y_path)) print(f"==> Avg reward: {mean_rew / n_traj}") avg_rewards.append(mean_rew / n_traj) avg_components["avg_cert"].append(mean_cert / n_traj) avg_components["avg_L"].append(mean_L / n_traj) avg_components["avg_E"].append(mean_E / n_traj) avg_components["avg_div"].append(mean_div / n_traj) return torch.cat(X_paths), torch.cat(Y_paths), paths traj_length = params['traj_length'] num_each_class = params['num_each_class'] n_iter = params['n_iter'] X, Y = None, None budgets = params['budgets'] n_traj = params['n_traj_each_iter'] criterion_train = model_utils.soft_cross_entropy if traj_length > 0: n_iter = budgets // (traj_length * n_traj) print(f"==> Budget = {n_iter} x {traj_length} x {n_traj}") best_test_acc = [] best_acc = -1 avg_rewards = [] avg_components = collections.defaultdict(list) for iter in range(1, n_iter + 1): # n_iter * traj_length = budget print(f"==> Iteration: {iter}/{n_iter}") X_path, Y_path, paths = collect_training_trajectories(traj_length, n_traj=n_traj) adversary.add_to_replay_buffer(paths) adversary.train_agent() if X is None: X, Y = X_path, Y_path else: X = torch.cat((X, X_path)) Y = torch.cat((Y, Y_path)) transferset = ImageTensorSet((X, Y)) # ----------- Train #np.random.seed(cfg.DEFAULT_SEED) #torch.manual_seed(cfg.DEFAULT_SEED) #torch.cuda.manual_seed(cfg.DEFAULT_SEED) optimizer = get_optimizer(adv_model.parameters(), params['optimizer_choice'], **params) print(f"Train on {len(transferset)} samples") checkpoint_suffix = '.{extraction}' best_acc = model_utils.train_model(adv_model, transferset, model_dir, testset=testset, criterion_train=criterion_train, checkpoint_suffix=checkpoint_suffix, device=device, optimizer=optimizer, benchmark=best_acc, **params) best_test_acc.append(best_acc) adversary.agent.actor.save( osp.join(model_dir, "checkpoint.agent.state_dict")) # ----------- Log torch.save(best_test_acc, osp.join(model_dir, "best_acc.pylist")) torch.save(avg_rewards, osp.join(model_dir, "avg_rewards.pylist")) torch.save(avg_components, osp.join(model_dir, "avg_components.pydict")) torch.save(adversary.idx_counter, osp.join(model_dir, "idx_counter.pydict")) torch.save(transferset, osp.join(model_dir, "transferset.pt")) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(model_dir, 'params_train.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True) agent_params_out_path = osp.join(model_dir, 'agent_params_train.json') with open(agent_params_out_path, 'w') as jf: json.dump(agent_params, jf, indent=True)