示例#1
0
    def get_transferset(self):
        """
        :return:
        """
        # for rho_current in range(self.rho):
        rho_current = 0
        while self.blackbox.call_count < self.budget:
            print('=> Beginning substitute epoch {} (|D| = {})'.format(rho_current, len(self.D)))
            # -------------------------- 0. Initialize Model
            model_adv = zoo.get_net(self.model_adv_name, self.modelfamily, self.model_adv_pretrained,
                                    num_classes=self.num_classes)
            model_adv = model_adv.to(self.device)

            # -------------------------- 1. Train model on D
            model_adv = model_utils.train_model(model_adv, self.D, self.out_dir, num_workers=10,
                                                checkpoint_suffix='.{}'.format(self.blackbox.call_count),
                                                device=self.device, epochs=self.train_epochs, log_interval=500, lr=0.1,
                                                momentum=0.9, batch_size=self.batch_size, lr_gamma=0.1,
                                                testset=self.testset, criterion_train=model_utils.soft_cross_entropy)

            # -------------------------- 2. Evaluate model
            # _, acc = model_utils.test_step(model_adv, self.testloader, nn.CrossEntropyLoss(reduction='mean'),
            #                                device=self.device, epoch=rho_current)
            # self.accuracies.append(acc)

            # -------------------------- 3. Jacobian-based data augmentation
            if self.aug_strategy in ['jbda', 'jbself']:
                self.D = self.jacobian_augmentation(model_adv, rho_current)
            elif self.aug_strategy == 'jbtop{}'.format(self.topk):
                self.D = self.jacobian_augmentation_topk(model_adv, rho_current)
            else:
                raise ValueError('Unrecognized augmentation strategy: "{}"'.format(self.aug_strategy))

            # -------------------------- 4. End if necessary
            rho_current += 1
            if (self.blackbox.call_count >= self.budget) or ((self.rho is not None) and (rho_current >= self.rho)):
                print('=> # BB Queries ({}) >= budget ({}). Ending attack.'.format(self.blackbox.call_count,
                                                                                   self.budget))
                model_adv = zoo.get_net(self.model_adv_name, self.modelfamily, self.model_adv_pretrained,
                                        num_classes=self.num_classes)
                model_adv = model_adv.to(self.device)
                model_adv = model_utils.train_model(model_adv, self.D, self.out_dir, num_workers=10,
                                                    checkpoint_suffix='.{}'.format(self.blackbox.call_count),
                                                    device=self.device, epochs=self.final_train_epochs,
                                                    log_interval=500, lr=0.01, momentum=0.9, batch_size=self.batch_size,
                                                    lr_gamma=0.1, testset=self.testset,
                                                    criterion_train=model_utils.soft_cross_entropy)
                break

            print()

        return self.D, model_adv
示例#2
0
    def from_modeldir(cls, model_dir, vocab_size, num_classes, embed_dim, device=None, output_type='probs'):
        device = torch.device('cuda') if device is None else device

        # What was the model architecture used by this model?
        params_path = osp.join(model_dir, 'params.json')
        with open(params_path) as jf:
            params = json.load(jf)
        model_arch = params['model_arch']
        # num_classes = params['num_classes']
        victim_dataset = params.get('dataset', 'imagenet')
        modelfamily = datasets.dataset_to_modelfamily[victim_dataset]

        # Instantiate the model
        # model = model_utils.get_net(model_arch, n_output_classes=num_classes)
        model = zoo.get_net(model_arch, modelfamily, pretrained=None, vocab_size=vocab_size, num_class=num_classes, embed_dim=embed_dim)
        model = model.to(device)

        # Load weights
        checkpoint_path = osp.join(model_dir, 'model_best.pth.tar')
        if not osp.exists(checkpoint_path):
            checkpoint_path = osp.join(model_dir, 'checkpoint.pth.tar')
        print("=> loading checkpoint '{}'".format(checkpoint_path))
        checkpoint = torch.load(checkpoint_path, map_location=device) #To run on local machine. Remove when you run on the server.
        epoch = checkpoint['epoch']
        best_test_acc = checkpoint['best_acc']
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint (epoch {}, acc={:.2f})".format(epoch, best_test_acc))

        blackbox = cls(model, device, output_type)
        return blackbox
    def from_modeldir(cls,
                      model_dir,
                      device=None,
                      output_type='probs',
                      **kwargs):
        device = torch.device('cuda') if device is None else device

        # What was the model architecture used by this model?
        params_path = osp.join(model_dir, 'params.json')
        with open(params_path) as jf:
            params = json.load(jf)
        model_arch = params['model_arch']
        num_classes = params['num_classes']
        if 'queryset' in params:
            dataset_name = params['queryset']
        elif 'testdataset' in params:
            dataset_name = params['testdataset']
        elif 'dataset' in params:
            dataset_name = params['dataset']
        else:
            raise ValueError('Unable to determine model family')
        modelfamily = datasets.dataset_to_modelfamily[dataset_name]

        # Instantiate the model
        model = zoo.get_net(model_arch, modelfamily, num_classes=num_classes)
        model = model.to(device)

        # Load weights
        checkpoint_path = osp.join(model_dir, 'model_best.pth.tar')
        if not osp.exists(checkpoint_path):
            checkpoint_path = osp.join(model_dir, 'checkpoint.pth.tar')
        print("=> loading checkpoint '{}'".format(checkpoint_path))
        checkpoint = torch.load(checkpoint_path)
        epoch = checkpoint['epoch']
        best_test_acc = checkpoint['best_acc']
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint (epoch {}, acc={:.2f})".format(
            epoch, best_test_acc))

        # print(cls, model, device, output_type, kwargs)
        blackbox = cls(model=model,
                       device=device,
                       output_type=output_type,
                       dataset_name=dataset_name,
                       modelfamily=modelfamily,
                       model_arch=model_arch,
                       num_classes=num_classes,
                       model_dir=model_dir,
                       **kwargs)
        return blackbox
示例#4
0
    def __init__(self,
                 epsilon=None,
                 optim='linesearch',
                 model_adv_proxy=None,
                 max_grad_layer=None,
                 ydist='l1',
                 oracle='extreme',
                 disable_jacobian=False,
                 objmax=False,
                 out_path=None,
                 log_prefix='',
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        print('=> MAD ({})'.format(
            [self.dataset_name, epsilon, optim, ydist, oracle]))

        self.epsilon = epsilon
        self.out_path = out_path
        self.disable_jacobian = bool(disable_jacobian)
        if self.disable_jacobian:
            print('')
            print('!!!WARNING!!! Using G = eye(K)')
            print('')

        self.objmax = bool(objmax)

        # Victim's assumption of adversary's model
        print('Proxy for F_A = ', model_adv_proxy)
        if model_adv_proxy is not None:
            if osp.isdir(model_adv_proxy):
                model_adv_proxy_params = osp.join(model_adv_proxy,
                                                  'params.json')
                model_adv_proxy = osp.join(model_adv_proxy,
                                           'checkpoint.pth.tar')
                with open(model_adv_proxy_params, 'r') as rf:
                    proxy_params = json.load(rf)
                    model_adv_proxy_arch = proxy_params['model_arch']
                print('Loading proxy ({}) parameters: {}'.format(
                    model_adv_proxy_arch, model_adv_proxy))
            assert osp.exists(model_adv_proxy), 'Does not exist: {}'.format(
                model_adv_proxy)
            self.model_adv_proxy = zoo.get_net(model_adv_proxy_arch,
                                               self.modelfamily,
                                               pretrained=model_adv_proxy,
                                               num_classes=self.num_classes)
            self.model_adv_proxy = self.model_adv_proxy.to(self.device)
        else:
            self.model_adv_proxy = self.model

        # To compute stats
        self.dataset = datasets.__dict__[self.dataset_name]
        self.modelfamily = datasets.dataset_to_modelfamily[self.dataset_name]
        self.train_transform = datasets.modelfamily_to_transforms[
            self.modelfamily]['train']
        self.test_transform = datasets.modelfamily_to_transforms[
            self.modelfamily]['test']
        self.testset = self.dataset(train=False, transform=self.test_transform)

        self.K = len(self.testset.classes)
        self.D = None

        self.ydist = ydist
        assert ydist in ['l1', 'l2', 'kl']

        # Which oracle to use
        self.oracle = oracle
        assert self.oracle in ['extreme', 'random', 'argmin', 'argmax']

        # Which algorithm to use to optimize
        self.optim = optim
        assert optim in ['linesearch', 'projections', 'greedy']

        # Gradients from which layer to use?
        assert max_grad_layer in [None, 'all']
        self.max_grad_layer = max_grad_layer

        # Track some data for debugging
        self.queries = []  # List of (x_i, y_i, y_i_prime, distance)
        self.log_path = osp.join(out_path,
                                 'distance{}.log.tsv'.format(log_prefix))
        if not osp.exists(self.log_path):
            with open(self.log_path, 'w') as wf:
                columns = [
                    'call_count', 'l1_mean', 'l1_std', 'l2_mean', 'l2_std',
                    'kl_mean', 'kl_std'
                ]
                wf.write('\t'.join(columns) + '\n')

        self.jacobian_times = []
示例#5
0
    def __init__(self, epsilon=None, optim='linesearch', model_adv_proxy=None, max_grad_layer=None, ydist='l1',
                 oracle='extreme', model_adv=None, model_adv_proxy_notrain=False, out_path=None, disable_jacobian=False,
                 attacker_argmax=False, adv_optimizer='sgd', objmax=False, log_prefix='', *args, **kwargs):
        super().__init__(*args, **kwargs)
        print('=> MAD ({})'.format([self.dataset_name, epsilon, optim, ydist, oracle]))

        self.epsilon = epsilon
        self.out_path = out_path
        self.disable_jacobian = bool(disable_jacobian)
        if self.disable_jacobian:
            print('')
            print('!!!WARNING!!! Using G = eye(K)')
            print('')

        self.attacker_argmax = bool(attacker_argmax)
        if self.attacker_argmax:
            print('')
            print('!!!WARNING!!! Argmax of perturbed probabilities used to train attacker model')
            print('')

        self.objmax = bool(objmax)

        '''
            Here, we refer to three models:
            a) self.model: Victim's model (already initialized by super)
            b) self.model_adv: Adversary's model
            c) self.model_adv_proxy: Proxy to adversary's model
        '''

        '''(b) self.model_adv: Adversary's model '''
        # Unlike BreakSGD, here we assume a perfect-knowledge adversary - which will also be trained online
        # Initialize the adv model
        print('=> Initializing adv model compatible with: ', self.model_arch, self.modelfamily, self.dataset_name,
              self.num_classes)
        if model_adv is not None and osp.isdir(model_adv):
            model_adv = osp.join(model_adv, 'checkpoint.pth.tar')
        assert model_adv is None or osp.exists(model_adv)
        self.model_adv = zoo.get_net(self.model_arch, self.modelfamily, pretrained=model_adv,
                                     num_classes=self.num_classes)
        self.adv_optimizer = adv_optimizer
        assert adv_optimizer in ['sgd', 'sgdm', 'adam']
        if self.adv_optimizer == 'sgd':
            self.model_adv_optimizer = torch.optim.SGD(self.model_adv.parameters(), lr=0.1 / 64)
        elif self.adv_optimizer == 'sgdm':
            self.model_adv_optimizer = torch.optim.SGD(self.model_adv.parameters(), lr=0.1 / 64, momentum=0.5)
        elif self.adv_optimizer == 'adam':
            self.model_adv_optimizer = torch.optim.Adam(self.model_adv.parameters(), lr=0.001 / 64)
        else:
            raise ValueError('Unrecognized optimizer')
        self.model_adv = self.model_adv.to(self.device)
        self.model_adv.train()

        '''(c) self.model_adv_proxy: Proxy to adversary's model'''
        self.model_adv_proxy_notrain = bool(model_adv_proxy_notrain)
        if model_adv_proxy is None:
            # Perfect Knowledge (White-box attacker)
            self.model_adv_proxy = self.model_adv
        else:
            if osp.isdir(model_adv_proxy):
                model_adv_proxy = osp.join(model_adv_proxy, 'checkpoint.pth.tar')
            assert osp.exists(model_adv_proxy), 'Does not exist: {}'.format(model_adv_proxy)
            print('=== Models used for experiment ===')
            print('F_V        : ', self.model_dir)
            print('F_A        : ', osp.dirname(model_adv))
            print('F_A (proxy): ', osp.dirname(model_adv_proxy))
            print('F_A (proxy) trained online?: ', not self.model_adv_proxy_notrain)
            print('==================================')
            # Gray-box attacker
            self.model_adv_proxy = zoo.get_net(self.model_arch, self.modelfamily, pretrained=model_adv_proxy,
                                               num_classes=self.num_classes)
            if self.adv_optimizer == 'sgd':
                self.model_adv_proxy_optimizer = torch.optim.SGD(self.model_adv.parameters(), lr=0.1 / 64)
            elif self.adv_optimizer == 'sgdm':
                self.model_adv_proxy_optimizer = torch.optim.SGD(self.model_adv.parameters(), lr=0.1 / 64, momentum=0.5)
            elif self.adv_optimizer == 'adam':
                self.model_adv_proxy_optimizer = torch.optim.Adam(self.model_adv.parameters())
            else:
                raise ValueError('Unrecognized optimizer')
            self.model_adv_proxy = self.model_adv_proxy.to(self.device)

        # To compute stats
        self.dataset = datasets.__dict__[self.dataset_name]
        self.modelfamily = datasets.dataset_to_modelfamily[self.dataset_name]
        self.train_transform = datasets.modelfamily_to_transforms[self.modelfamily]['train']
        self.test_transform = datasets.modelfamily_to_transforms[self.modelfamily]['test']
        self.trainset = self.dataset(train=True, transform=self.train_transform)
        self.testset = self.dataset(train=False, transform=self.test_transform)
        self.test_loader = DataLoader(self.testset, batch_size=128, shuffle=False, num_workers=5)
        self.best_test_acc = 0.
        # Also keep a mini-testset to eval victim model using current strategy
        self.minitestset = self.dataset(train=False, transform=self.test_transform)
        self.minitestset = torch.utils.data.Subset(self.minitestset, indices=np.arange(1000))
        self.minitest_loader = DataLoader(self.minitestset, batch_size=1, shuffle=False, num_workers=1)

        self.K = len(self.testset.classes)
        self.D = None

        self.ydist = ydist
        assert ydist in ['l1', 'l2', 'kl']

        # Which oracle to use
        self.oracle = oracle
        assert self.oracle in ['extreme', 'random', 'argmin', 'argmax']

        # Which algorithm to use to optimize
        self.optim = optim
        assert optim in ['linesearch', 'projections', 'greedy']

        # Gradients from which layer to use?
        assert max_grad_layer in [None, 'all']
        self.max_grad_layer = max_grad_layer

        # Track some data for debugging
        self.queries = []  # List of (x_i, y_i, y_i_prime, distance)
        self.run_id = str(datetime.now())
        self.log_path = osp.join(out_path, 'online.log.tsv')
        if not osp.exists(self.log_path):
            with open(self.log_path, 'w') as wf:
                columns = ['run_id', 'epoch', 'split', 'loss', 'accuracy', 'best_accuracy', 'l1_mean', 'l1_std',
                           'l2_mean', 'l2_std', 'kl_mean', 'kl_std']
                wf.write('\t'.join(columns) + '\n')
示例#6
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('dataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Dataset name')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    # Optional arguments
    parser.add_argument('-o',
                        '--out_path',
                        metavar='PATH',
                        type=str,
                        help='Output path for model',
                        default=cfg.MODEL_DIR)
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        metavar='LR',
                        help='learning rate (default: 0.1)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=100,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=30,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('--train_subset',
                        type=int,
                        help='Use a subset of train set',
                        default=None)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=None)
    args = parser.parse_args()
    params = vars(args)

    # torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Set up dataset
    dataset_name = params['dataset']
    valid_datasets = datasets.__dict__.keys()
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]

    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    train_transform = datasets.modelfamily_to_transforms[modelfamily]['train']
    test_transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    trainset = dataset(train=True, transform=train_transform)
    testset = dataset(train=False, transform=test_transform)
    num_classes = len(trainset.classes)
    params['num_classes'] = num_classes

    if params['train_subset'] is not None:
        idxs = np.arange(len(trainset))
        ntrainsubset = params['train_subset']
        idxs = np.random.choice(idxs, size=ntrainsubset, replace=False)
        trainset = Subset(trainset, idxs)

    # ----------- Set up model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes)
    model = model.to(device)

    # ----------- Train
    out_path = params['out_path']
    model_utils.train_model(model,
                            trainset,
                            testset=testset,
                            device=device,
                            **params)

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
示例#7
0
    def __init__(self,
                 beta=1.0,
                 gamma=1.0,
                 out_path=None,
                 model_adv=None,
                 attacker_argmax=False,
                 adv_optimizer='sgd',
                 log_prefix='',
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        print('=> ReverseSigmoid ({})'.format([beta, gamma]))

        assert beta >= 0.
        assert gamma >= 0.

        self.beta = beta
        self.gamma = gamma

        self.attacker_argmax = bool(attacker_argmax)
        if self.attacker_argmax:
            print('')
            print(
                '!!!WARNING!!! Argmax of perturbed probabilities used to train attacker model'
            )
            print('')
        ''' 
                    White-box specific stuff 
                '''
        self.out_path = out_path

        # self.model_adv: Adversary's model
        # Initialize the adv model
        print('=> Initializing adv model compatible with: ', self.model_arch,
              self.modelfamily, self.dataset_name, self.num_classes)
        if model_adv is not None and osp.isdir(model_adv):
            model_adv = osp.join(model_adv, 'checkpoint.pth.tar')
        assert model_adv is None or osp.exists(model_adv)
        self.model_adv = zoo.get_net(self.model_arch,
                                     self.modelfamily,
                                     pretrained=model_adv,
                                     num_classes=self.num_classes)
        self.adv_optimizer = adv_optimizer
        assert adv_optimizer in ['sgd', 'sgdm', 'adam']
        if self.adv_optimizer == 'sgd':
            self.model_adv_optimizer = torch.optim.SGD(
                self.model_adv.parameters(), lr=0.1 / 64)
        elif self.adv_optimizer == 'sgdm':
            self.model_adv_optimizer = torch.optim.SGD(
                self.model_adv.parameters(), lr=0.1 / 64, momentum=0.5)
        elif self.adv_optimizer == 'adam':
            self.model_adv_optimizer = torch.optim.Adam(
                self.model_adv.parameters(), lr=0.001 / 64)
        else:
            raise ValueError('Unrecognized optimizer')
        self.model_adv = self.model_adv.to(self.device)
        self.model_adv.train()

        # To compute stats
        self.dataset = datasets.__dict__[self.dataset_name]
        self.modelfamily = datasets.dataset_to_modelfamily[self.dataset_name]
        self.train_transform = datasets.modelfamily_to_transforms[
            self.modelfamily]['train']
        self.test_transform = datasets.modelfamily_to_transforms[
            self.modelfamily]['test']
        self.trainset = self.dataset(train=True,
                                     transform=self.train_transform)
        self.testset = self.dataset(train=False, transform=self.test_transform)
        self.test_loader = DataLoader(self.testset,
                                      batch_size=128,
                                      shuffle=False,
                                      num_workers=5)
        self.best_test_acc = 0.
        # Also keep a mini-testset to eval victim model using current strategy
        self.minitestset = self.dataset(train=False,
                                        transform=self.test_transform)
        self.minitestset = torch.utils.data.Subset(self.minitestset,
                                                   indices=np.arange(1000))
        self.minitest_loader = DataLoader(self.minitestset,
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=1)

        # Track some data for debugging
        self.queries = []  # List of (x_i, y_i, y_i_prime, distance)
        self.run_id = str(datetime.now())
        self.log_path = osp.join(out_path, 'online.log.tsv')
        if not osp.exists(self.log_path):
            with open(self.log_path, 'w') as wf:
                columns = [
                    'run_id', 'epoch', 'split', 'loss', 'accuracy',
                    'best_accuracy', 'l1_mean', 'l1_std', 'l2_mean', 'l2_std',
                    'kl_mean', 'kl_std'
                ]
                wf.write('\t'.join(columns) + '\n')
示例#8
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('model_dir',
                        metavar='DIR',
                        type=str,
                        help='Directory containing transferset.pickle')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')
    parser.add_argument('-o', '--output', type=str, help="Output dir")
    parser.add_argument('--budget',
                        metavar='N',
                        type=int,
                        help='Size of transfer set to construct',
                        required=True)
    # Optional arguments
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=1024,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--device_id', default=0, type=int)
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    args = parser.parse_args()
    params = vars(args)

    random.seed(cfg.DEFAULT_SEED)
    np.random.seed(cfg.DEFAULT_SEED)
    torch.cuda.manual_seed(cfg.DEFAULT_SEED)
    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model_dir = params['model_dir']
    output_dir = params['output']
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # ----------- Set up transferset
    transferset_path = osp.join(model_dir,
                                f"transferset_{params['budget']}.pickle")
    with open(transferset_path, 'rb') as rf:
        transferset_samples = pickle.load(rf)
    num_classes = transferset_samples[0][1].size(0)
    print('=> found transfer set with {} samples, {} classes'.format(
        len(transferset_samples), num_classes))
    # Must transfer target vector to one-hot vector for resnet10
    new_transferset_samples = []
    print('=> Using argmax labels (instead of posterior probabilities)')
    for i in range(len(transferset_samples)):
        x_i, y_i = transferset_samples[i]
        argmax_k = y_i.argmax()
        y_i_1hot = torch.zeros_like(y_i)
        y_i_1hot[argmax_k] = 1.
        new_transferset_samples.append((x_i, y_i_1hot))
    transferset_samples = new_transferset_samples

    if params['testdataset'] == 'CIFAR10':
        num_classes = 10
    # ----------- Set up testset
    dataset_name = params['testdataset']
    valid_datasets = datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]
    testset = dataset(train=False, transform=transform)
    if len(testset.classes) != num_classes:
        raise ValueError(
            '# Transfer classes ({}) != # Testset classes ({})'.format(
                num_classes, len(testset.classes)))

    # transferset = samples_to_transferset(transferset_samples, budget=params['budget'], transform=transform)
    transferset_fn = partial(
        samples_to_transferset,
        samples=transferset_samples,
        transform=transform,
    )

    # ----------- Set up model
    model_name = params['model_arch']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        False,
                        num_classes=num_classes)

    if params['model_arch'] == "resnet10":
        model = get_transferred_pytorch_model_resnet10()
    elif params['model_arch'] == "resnet18":
        model = get_transferred_pytorch_model_resnet18(testset)
    else:
        raise NotImplementedError
    model = model.cuda()

    train_fn = partial(
        train_model,
        epochs=10,
        batch_size=256,
        log_interval=50,
        lr=5e-3,
        momentum=0.5,
        lr_step=30,
        lr_gamma=0.1,
    )

    # Protect all convs
    test_prune_allconv_ratio_subset(model=model,
                                    train_fn=train_fn,
                                    transferset_fn=transferset_fn,
                                    testset=testset,
                                    out_path=output_dir,
                                    **params)
示例#9
0
def main():
	parser = argparse.ArgumentParser(description='Train a model')
	# Required arguments
	parser.add_argument('dataset', metavar='DS_NAME', type=str, help='Dataset name')
	parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name')
	# Optional arguments
	parser.add_argument('-o', '--out_path', metavar='PATH', type=str, help='Output path for model',
						default=cfg.MODEL_DIR)
	parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0)
	parser.add_argument('-b', '--batch-size', type=int, default=32, metavar='N',
						help='input batch size for training (default: 64)')
	parser.add_argument('--datadir', default='.data',
						help='data directory (default=.data)')
	parser.add_argument('-e', '--epochs', type=int, default=10, metavar='N',
						help='number of epochs to train (default: 100)')
	parser.add_argument('--embed-dim', type=int, default=32,
						help='embed dim. (default=32)')
	parser.add_argument('--lr', type=float, default=4.0, metavar='LR',
						help='learning rate (default: 0.1)')
	# parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
	# 					help='SGD momentum (default: 0.5)')
	parser.add_argument('--log-interval', type=int, default=100, metavar='N',
						help='how many batches to wait before logging training status')
	parser.add_argument('--resume', default=None, type=str, metavar='PATH',
						help='path to latest checkpoint (default: none)')
	parser.add_argument('--lr-step', type=int, default=1, metavar='N',
						help='Step sizes for LR')
	parser.add_argument('--lr-gamma', type=float, default=0.8, metavar='N',
						help='LR Decay Rate')
	parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10)
	parser.add_argument('--train_subset', type=int, help='Use a subset of train set', default=None)
	parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None)
	parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=None)

	args = parser.parse_args()
	params = vars(args)

	# torch.manual_seed(cfg.DEFAULT_SEED)
	# extract parameter arguments into variables
	embed_dim = params['embed_dim']
	dataset_name = params['dataset']
	datadir = params['datadir']
	out_path = params['out_path']
	batch_size = params['batch_size']
	lr = params['lr']
	lr_gamma = params['lr_gamma']
	num_workers = params['num_workers']
	num_epochs = params['epochs']
	model_name = params['model_arch']
	pretrained = params['pretrained']

	if params['device_id'] >= 0:
		os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
		device = torch.device('cuda')
	else:
		device = torch.device('cpu')

	# Currently supports only the torchtext datasets
	valid_datasets = list(text_classification.DATASETS.keys())
	if dataset_name not in valid_datasets:
		raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets))

	# Currently, we will support classification tasks only. Sentiment task is the next one.
	# See /knockoff/datasets/__init__.py for mapping
	modelfamily = datasets.dataset_to_modelfamily[dataset_name]  # e.g. 'classification'
	metadata = datasets.dataset_metadata[dataset_name]  # Relevant parameters for the task. e.g. 'ngram'

	ngrams = metadata['ngram']

	# If dataset does not exist, download it and save it
	dataset_dir = datadir + '/' + dataset_name.lower() + '_csv'
	train_data_path = os.path.join(dataset_dir, dataset_name + "_ngrams_{}_train.data".format(ngrams))
	test_data_path = os.path.join(dataset_dir, dataset_name + "_ngrams_{}_test.data".format(ngrams))
	if not (os.path.exists(train_data_path) and os.path.exists(test_data_path)):
		if not os.path.exists('.data'):
			print("Creating directory {}".format(datadir))
			os.mkdir('.data')
		trainset, testset = text_classification.DATASETS[dataset_name](root='.data', ngrams=ngrams)
		print("Saving train data to {}".format(train_data_path))
		torch.save(trainset, train_data_path)
		print("Saving test data to {}".format(test_data_path))
		torch.save(testset, test_data_path)
	else:
		print("Loading train data from {}".format(train_data_path))
		trainset = torch.load(train_data_path)
		print("Loading test data from {}".format(test_data_path))
		testset = torch.load(test_data_path)

	# Extract variables for model from the dataset
	vocab_size = len(trainset.get_vocab())
	params['num_classes'] = len(trainset.get_labels())
	num_classes = params['num_classes']

	model = zoo.get_net(model_name, modelfamily, pretrained, vocab_size=vocab_size, embed_dim=embed_dim,
						num_class=num_classes)
	model = model.to(device)
	model_utils.train_and_valid(trainset, testset, model, model_name, modelfamily, out_path, batch_size, lr, lr_gamma,
								num_workers, device=device, num_epochs=num_epochs)

	# Store arguments in json file. Maybe for the transfer set step?
	params['created_on'] = str(datetime.now())
	params_out_path = osp.join(out_path, 'params.json')
	with open(params_out_path, 'w') as jf:
		json.dump(params, jf, indent=True)
示例#10
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('model_dir',
                        metavar='DIR',
                        type=str,
                        help='Directory containing transferset.pickle')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')
    parser.add_argument(
        '--budgets',
        metavar='B',
        type=str,
        help=
        'Comma separated values of budgets. Knockoffs will be trained for each budget.'
    )
    # Optional arguments
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=50,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=60,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=False)
    args = parser.parse_args()
    params = vars(args)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model_dir = params['model_dir']

    # ----------- Set up transferset
    transferset_path = osp.join(model_dir, 'transferset.pickle')
    with open(transferset_path, 'rb') as rf:
        transferset_samples = pickle.load(rf)
    num_classes = transferset_samples[0][1].size(0)
    print('=> found transfer set with {} samples, {} classes'.format(
        len(transferset_samples), num_classes))

    # ----------- Set up testset
    dataset_name = params['testdataset']
    valid_datasets = datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]
    testset = dataset(train=False, transform=transform)
    if len(testset.classes) != num_classes:
        raise ValueError(
            '# Transfer classes ({}) != # Testset classes ({})'.format(
                num_classes, len(testset.classes)))

    # ----------- Set up model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes)
    model = model.to(device)

    # ----------- Train
    budgets = [int(b) for b in params['budgets'].split(',')]

    for b in budgets:
        np.random.seed(cfg.DEFAULT_SEED)
        torch.manual_seed(cfg.DEFAULT_SEED)
        torch.cuda.manual_seed(cfg.DEFAULT_SEED)

        transferset = samples_to_transferset(transferset_samples,
                                             budget=b,
                                             transform=transform)
        print()
        print('=> Training at budget = {}'.format(len(transferset)))

        checkpoint_suffix = '.{}'.format(b)
        criterion_train = model_utils.soft_cross_entropy
        model_utils.train_model(model,
                                transferset,
                                model_dir,
                                testset=testset,
                                criterion_train=criterion_train,
                                checkpoint_suffix=checkpoint_suffix,
                                device=device,
                                **params)

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(model_dir, 'params_train.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
示例#11
0
def main():
    parser = argparse.ArgumentParser(
        description='Construct apaptive transfer set')
    parser.add_argument(
        'victim_model_dir',
        metavar='PATH',
        type=str,
        help=
        'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"'
    )
    parser.add_argument('--out_dir',
                        metavar='PATH',
                        type=str,
                        help='Destination directory to store transfer set',
                        required=True)
    parser.add_argument('--budget',
                        metavar='N',
                        type=int,
                        help='Size of transfer set to construct',
                        required=True)
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')

    parser.add_argument('--queryset',
                        metavar='TYPE',
                        type=str,
                        help='Adversary\'s dataset (P_A(X))',
                        required=True)
    parser.add_argument('--batch_size',
                        metavar='TYPE',
                        type=int,
                        help='Batch size of queries',
                        default=8)
    # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes',
    #                     default=None)
    # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals',
    #                     default=None)
    # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data',
    #                     default=1.0)
    # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data',
    #                     default=1.0)
    # ----------- Other params
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id',
                        default=0)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--defense',
                        type=str,
                        help='Defense strategy used by victim side',
                        default=None)

    args = parser.parse_args()
    params = vars(args)

    out_path = params['out_dir']
    knockoff_utils.create_dir(out_path + "-adaptive")

    defense = params['defense']
    if defense:
        transfer_out_path = osp.join(out_path + "-adaptive",
                                     'transferset-' + defense + '.pickle')
    else:
        transfer_out_path = osp.join(out_path + "-adaptive",
                                     'transferset.pickle')

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Set up queryset
    queryset_name = params['queryset']
    valid_datasets = datasets.__dict__.keys()
    if queryset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[queryset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    queryset = datasets.__dict__[queryset_name](train=True,
                                                transform=transform)
    queryset.targets = [
        queryset.samples[idx][1] for idx in range(len(queryset))
    ]
    # code.interact(local=dict(globals(), **locals()))
    num_classes = len(queryset.classes)
    # code.interact(local=dict(globals(), **locals()))
    # ----------- Initialize blackbox
    blackbox_dir = params['victim_model_dir']
    blackbox = Blackbox.from_modeldir(blackbox_dir, defense, device)

    # ----------- Initialize Knockoff Nets
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes)
    model = model.to(device)

    adversary = AdaptiveAdversary(queryset,
                                  blackbox,
                                  model,
                                  device,
                                  reward='all')

    print('=> constructing transfer set...')
    transferset = adversary.get_transferset(params['budget'])

    with open(transfer_out_path, 'wb') as wf:
        pickle.dump(transferset, wf)
    print('=> transfer set ({} samples) written to: {}'.format(
        len(transferset), transfer_out_path))

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params_transfer.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
示例#12
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('model_dir', metavar='DIR', type=str, help='Directory containing transferset.pickle')
    parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name')
    parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test')
    parser.add_argument('-o', '--output', type=str, help="Output dir")
    # Optional arguments
    parser.add_argument('-b', '--batch-size', type=int, default=1024, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--resume', default=None, type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--device_id', default=0, type=int)
    parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10)
    args = parser.parse_args()
    params = vars(args)

    random.seed(cfg.DEFAULT_SEED)
    np.random.seed(cfg.DEFAULT_SEED)
    torch.cuda.manual_seed(cfg.DEFAULT_SEED)
    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model_dir = params['model_dir']
    output_dir = params['output']
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if params['testdataset'] == 'CIFAR10':
        num_classes = 10
    # ----------- Set up testset
    dataset_name = params['testdataset']
    valid_datasets = datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if dataset_name not in valid_datasets:
        raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]
    testset = dataset(train=False, transform=transform)
    if len(testset.classes) != num_classes:
        raise ValueError('# Transfer classes ({}) != # Testset classes ({})'.format(num_classes, len(testset.classes)))

    # ----------- Set up model
    model_name = params['model_arch']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name, modelfamily, False, num_classes=num_classes)
    # model = model.to(device)
    # model = protect_model(model)    
    
    if params['model_arch'] == "resnet10":
        model = get_transferred_pytorch_model_resnet10()
    elif params['model_arch'] == "resnet18":
        model = get_transferred_pytorch_model_resnet18(testset)
    else:
        raise NotImplementedError
    model = model.cuda()
    
    # Test clean tf model accuracy
    # model = load_class_avg(model, arch=params['model_arch'])
    # test(
    #     model=model,
    #     testset=testset,
    #     out_path=output_dir,
    #     **params,
    # )
    # st()
    
    # Test accuracy of each conv
    # for class_id in ["all"] + list(range(10)):
    #     testset = dataset(train=False, transform=transform)
    #     test_prune_per_conv(
    #         model=model, 
    #         testset=testset, 
    #         out_path=output_dir, 
    #         class_id=class_id,
    #         **params
    #     )
    
    # Test allconv pruned model om the whole dataset
    # test_prune_allconv_ratio_all(
    #     model=model, 
    #     testset=testset, 
    #     out_path=output_dir, 
    #     **params
    # )
    
    test_prune_allconv_ratio_subsetacc(
        model=model, 
        testset=testset, 
        out_path=output_dir, 
        **params
    )
示例#13
0
    def __init__(self, blackbox, budget, model_adv_name, model_adv_pretrained, modelfamily, seedset, testset, device,
                 out_dir, batch_size=cfg.DEFAULT_BATCH_SIZE, train_epochs=20, kappa=400, tau=None, rho=6, sigma=-1,
                 query_batch_size=1, aug_strategy='jbda', useprobs=True, final_train_epochs=100):
        self.blackbox = blackbox
        self.budget = budget
        self.model_adv_name = model_adv_name
        self.model_adv_pretrained = model_adv_pretrained
        self.model_adv = None
        self.modelfamily = modelfamily
        self.seedset = seedset
        self.testset = testset
        self.batch_size = batch_size
        self.query_batch_size = query_batch_size
        self.testloader = DataLoader(self.testset, batch_size=self.batch_size, pin_memory=True)
        self.train_epochs = train_epochs
        self.final_train_epochs = final_train_epochs
        self.kappa = kappa
        self.tau = tau
        self.rho = rho
        self.sigma = sigma
        self.device = device
        self.out_dir = out_dir
        self.num_classes = len(self.testset.classes)
        assert (aug_strategy in ['jbda', 'jbself']) or 'jbtop' in aug_strategy
        self.aug_strategy = aug_strategy
        self.topk = 0
        if 'jbtop' in aug_strategy:
            # extract k from "jbtop<k>"
            self.topk = int(aug_strategy.replace('jbtop', ''))

        self.accuracies = []  # Track test accuracies over time
        self.useprobs = useprobs

        # -------------------------- Initialize seed data
        print('=> Obtaining predictions over {} seed samples using strategy {}'.format(len(self.seedset),
                                                                                       self.aug_strategy))
        Dx = torch.cat([self.seedset[i][0].unsqueeze(0) for i in range(len(self.seedset))])
        Dy = []

        # Populate Dy
        with torch.no_grad():
            for inputs, in DataLoader(TensorDataset(Dx), batch_size=self.query_batch_size):
                inputs = inputs.to(self.device)
                outputs = blackbox(inputs).cpu()
                if not self.useprobs:
                    labels = torch.argmax(outputs, dim=1)
                    labels_onehot = make_one_hot(labels, outputs.shape[1])
                    outputs = labels_onehot
                Dy.append(outputs)
        # Dy = torch.tensor(Dy)
        Dy = torch.cat(Dy)

        # TensorDataset D
        self.D = TensorDataset(Dx, Dy)

        ### Block memory required for training later on
        model_adv = zoo.get_net(self.model_adv_name, self.modelfamily, self.model_adv_pretrained,
                                num_classes=self.num_classes)
        model_adv = model_adv.to(self.device)
        model_adv = model_utils.train_model(model_adv, self.D, self.out_dir, num_workers=10,
                                            checkpoint_suffix='.{}'.format(self.blackbox.call_count),
                                            device=self.device, epochs=1,
                                            log_interval=500, lr=0.01, momentum=0.9, batch_size=self.batch_size,
                                            lr_gamma=0.1, testset=self.testset,
                                            criterion_train=model_utils.soft_cross_entropy)
示例#14
0
def get_transferred_pytorch_model_resnet10():
    model = zoo.get_net("resnet10", "cifar", False, num_classes=10)
    model = load_tf_model_resnet10(model)
    return model
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('policy',
                        metavar='PI',
                        type=str,
                        help='Policy to use while training',
                        choices=['random', 'adaptive', 'jacobian'])
    parser.add_argument('model_dir',
                        metavar='DIR',
                        type=str,
                        help='Directory containing transferset.pickle')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')
    parser.add_argument(
        '--budgets',
        metavar='B',
        type=str,
        help=
        'Comma separated values of budgets. Knockoffs will be trained for each budget.'
    )
    # Optional arguments
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=50,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=60,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=False)
    # Attacker's defense
    parser.add_argument('--argmaxed',
                        action='store_true',
                        help='Only consider argmax labels',
                        default=False)
    parser.add_argument('--optimizer_choice',
                        type=str,
                        help='Optimizer',
                        default='sgdm',
                        choices=('sgd', 'sgdm', 'adam', 'adagrad'))
    parser.add_argument('--defense',
                        type=str,
                        help='Defense strategy used by victim side',
                        default=None)

    args = parser.parse_args()
    params = vars(args)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    model_dir = params['model_dir'] + "-" + params["policy"]

    # ----------- Set up transferset
    defense = params['defense']
    if defense:
        transferset_path = osp.join(model_dir,
                                    'transferset-' + defense + '.pickle')
    else:
        transferset_path = osp.join(model_dir, 'transferset.pickle')
    with open(transferset_path, 'rb') as rf:
        transferset_samples = pickle.load(rf)
    num_classes = transferset_samples[0][1].size(0)
    print('=> found transfer set with {} samples, {} classes'.format(
        len(transferset_samples), num_classes))

    # ----------- Clean up transfer (if necessary)
    if params['argmaxed']:
        new_transferset_samples = []
        print('=> Using argmax labels (instead of posterior probabilities)')
        for i in range(len(transferset_samples)):
            x_i, y_i = transferset_samples[i]
            argmax_k = y_i.argmax()
            y_i_1hot = torch.zeros_like(y_i)
            y_i_1hot[argmax_k] = 1.
            new_transferset_samples.append((x_i, y_i_1hot))
        transferset_samples = new_transferset_samples

    # ----------- Set up testset
    dataset_name = params['testdataset']
    valid_datasets = datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]
    testset = dataset(train=False, transform=transform)
    if len(testset.classes) != num_classes:
        raise ValueError(
            '# Transfer classes ({}) != # Testset classes ({})'.format(
                num_classes, len(testset.classes)))

    # ----------- Set up model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes)
    model = model.to(device)

    # ----------- Train
    budgets = [int(b) for b in params['budgets'].split(',')]

    for b in budgets:
        np.random.seed(cfg.DEFAULT_SEED)
        torch.manual_seed(cfg.DEFAULT_SEED)
        torch.cuda.manual_seed(cfg.DEFAULT_SEED)

        transferset = samples_to_transferset(transferset_samples,
                                             budget=b,
                                             transform=transform)
        # code.interact(local=dict(globals(), **locals()))
        print()
        print('=> Training at budget = {}'.format(len(transferset)))

        optimizer = get_optimizer(model.parameters(),
                                  params['optimizer_choice'], **params)
        print(params)

        checkpoint_suffix = '.{}'.format(b)
        criterion_train = model_utils.soft_cross_entropy

        model_utils.train_model(model,
                                transferset,
                                model_dir,
                                testset=testset,
                                criterion_train=criterion_train,
                                checkpoint_suffix=checkpoint_suffix,
                                device=device,
                                optimizer=optimizer,
                                **params)

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(model_dir, 'params_train.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
示例#16
0
def get_transferred_pytorch_model_resnet18(testset):
    model = zoo.get_net("resnet18", "cifar", False, num_classes=10)
    model = load_tf_model_resnet18(model, testset)
    return model
示例#17
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('model_dir',
                        metavar='DIR',
                        type=str,
                        help='Directory containing transferset.pickle')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')
    parser.add_argument('--budgets',
                        metavar='B',
                        type=int,
                        help='Knockoffs will be trained for budget.')
    # Optional arguments
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=50,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=60,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=False)

    # RL arguments
    parser.add_argument('--traj_length',
                        metavar='N',
                        type=int,
                        help='# Step in one trajactory',
                        default=10)
    parser.add_argument('--num_each_class',
                        metavar='N',
                        type=int,
                        help='# sample in each class',
                        default=1)
    parser.add_argument('--n_iter',
                        metavar='N',
                        type=int,
                        help='# iterations of RL training',
                        default=10)
    parser.add_argument('--n_traj_each_iter',
                        metavar='N',
                        type=int,
                        help='# trajactories / iter',
                        default=10)
    parser.add_argument('--queryset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')
    parser.add_argument('--victim_model_dir',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')

    parser.add_argument('--n_layers',
                        metavar='N',
                        type=int,
                        help='# layers in policy',
                        default=4)
    parser.add_argument('--size',
                        metavar='N',
                        type=int,
                        help='size of layer in policy',
                        default=64)
    parser.add_argument('--policy_lr',
                        type=float,
                        default=1e-4,
                        metavar='N',
                        help='Policy learning rate')
    parser.add_argument('--num_agent_train_steps_per_iter',
                        metavar='N',
                        type=int,
                        help='num_agent_train_steps_per_iter',
                        default=10)
    parser.add_argument('--agent_train_batch_size',
                        metavar='N',
                        type=int,
                        help='num_agent_train_steps_per_iter',
                        default=990)
    parser.add_argument('--policy_gamma',
                        type=float,
                        default=0.9,
                        metavar='N',
                        help='reward discounting')
    parser.add_argument('--eps_random',
                        type=float,
                        default=-1,
                        metavar='N',
                        help='eps random exploration')
    parser.add_argument('--nn_baseline',
                        action='store_true',
                        help='Use nn baseline',
                        default=False)

    # Attacker's defense
    parser.add_argument('--argmaxed',
                        action='store_true',
                        help='Only consider argmax labels',
                        default=False)
    parser.add_argument('--optimizer_choice',
                        type=str,
                        help='Optimizer',
                        default='sgdm',
                        choices=('sgd', 'sgdm', 'adam', 'adagrad'))
    args = parser.parse_args()
    params = vars(args)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
        ptu.init_gpu()
    else:
        device = torch.device('cpu')

    model_dir = params['model_dir']

    # ----------- Set up testset
    dataset_name = params['testdataset']
    valid_datasets = datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]
    testset = dataset(train=False, transform=transform)
    #if len(testset.classes) != num_classes:
    #    raise ValueError('# Transfer classes ({}) != # Testset classes ({})'.format(num_classes, len(testset.classes)))

    # ----------- Set up queryset
    queryset_name = params['queryset']
    valid_datasets = datasets.__dict__.keys()
    if queryset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[queryset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['train']
    try:
        queryset = datasets.__dict__[queryset_name](train=True,
                                                    transform=transform)
    except:
        queryset = datasets.__dict__[queryset_name](split="train",
                                                    transform=transform)

    num_classes = len(queryset.classes)

    # ----------- Initialize blackbox
    blackbox_dir = params['victim_model_dir']
    blackbox = Blackbox.from_modeldir(blackbox_dir, device)

    # ----------- Set up adversary model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    adv_model = zoo.get_net(model_name,
                            modelfamily,
                            pretrained,
                            num_classes=10)
    adv_model = adv_model.to(device)

    # ----------- Initialize adversary
    num_each_class = params['num_each_class']
    agent_params = {
        "ac_dim": num_classes,
        "ob_dim": len(testset.classes),
        "n_layers": params["n_layers"],
        "size": params["size"],
        "discrete": True,
        "learning_rate": params["policy_lr"],
        "num_agent_train_steps_per_iter":
        params["num_agent_train_steps_per_iter"],
        "agent_train_batch_size": params["agent_train_batch_size"],
        "gamma": params["policy_gamma"],
        "reward_to_go": True,
        "nn_baseline": params["nn_baseline"],
        "standardize_advantages": True,
        "eps_random": params["eps_random"]
    }
    adversary = PGAdversary(queryset, num_each_class, agent_params)

    # ----------- Set up transferset
    def collect_training_trajectories(length, n_traj=10):
        nonlocal avg_rewards, avg_components
        paths = []
        mean_rew = 0
        mean_cert = mean_L = mean_E = mean_div = 0
        X_paths, Y_paths = [], []
        for _ in range(n_traj):
            obs, acs, rewards, next_obs = [], [], [], []
            r_certs, r_Ls, r_Es, r_divs = [], [], [], []
            X_path, Y_path = [], []
            X, actions = adversary.init_sampling()
            X_path.append(X)
            ob = blackbox(X)
            Y_path.append(ob)
            ob = ob.numpy()

            for t in range(length - 1):
                with torch.no_grad():
                    # Observe and react
                    obs.append(ob)
                    X_new, actions = adversary.sample(ob)
                    X_path.append(X_new)
                    acs.append(actions)

                    # Env gives feedback, which is a new observation
                    X_new = X_new.to(device)
                    ob = blackbox(X_new)
                    Y_path.append(ob)
                    ob = ob.cpu().numpy()
                    next_obs.append(ob)
                    Y_adv = adv_model(X_new)
                    Y_adv = F.softmax(Y_adv, dim=1).cpu().numpy()
                reward, r_cert, r_L, r_E, r_div = adversary.agent.calculate_reward(
                    ob, np.concatenate(acs), Y_adv)
                rewards.append(reward)
                r_certs.append(r_cert)
                r_Ls.append(r_L)
                r_Es.append(r_E)
                r_divs.append(r_div)

            obs = np.concatenate(obs)
            acs = np.concatenate(acs)

            rewards = np.concatenate(rewards)
            mean_rew += np.mean(rewards)

            mean_cert += np.mean(np.concatenate(r_certs))
            mean_L += np.mean(np.concatenate(r_Ls))
            mean_E += np.mean(np.array(r_Es))
            mean_div += np.mean(np.array(r_divs))

            next_obs = np.concatenate(next_obs)
            path = {
                "observation": obs,
                "action": acs,
                "reward": rewards,
                "next_observation": next_obs
            }
            paths.append(path)
            X_paths.append(torch.cat(X_path))
            Y_paths.append(torch.cat(Y_path))

        print(f"==> Avg reward: {mean_rew / n_traj}")
        avg_rewards.append(mean_rew / n_traj)
        avg_components["avg_cert"].append(mean_cert / n_traj)
        avg_components["avg_L"].append(mean_L / n_traj)
        avg_components["avg_E"].append(mean_E / n_traj)
        avg_components["avg_div"].append(mean_div / n_traj)
        return torch.cat(X_paths), torch.cat(Y_paths), paths

    traj_length = params['traj_length']
    num_each_class = params['num_each_class']
    n_iter = params['n_iter']
    X, Y = None, None
    budgets = params['budgets']
    n_traj = params['n_traj_each_iter']
    criterion_train = model_utils.soft_cross_entropy
    if traj_length > 0:
        n_iter = budgets // (traj_length * n_traj)

    print(f"==> Budget = {n_iter} x {traj_length} x {n_traj}")
    best_test_acc = []
    best_acc = -1
    avg_rewards = []
    avg_components = collections.defaultdict(list)
    for iter in range(1, n_iter + 1):
        # n_iter * traj_length = budget
        print(f"==> Iteration: {iter}/{n_iter}")
        X_path, Y_path, paths = collect_training_trajectories(traj_length,
                                                              n_traj=n_traj)

        adversary.add_to_replay_buffer(paths)

        adversary.train_agent()

        if X is None:
            X, Y = X_path, Y_path
        else:
            X = torch.cat((X, X_path))
            Y = torch.cat((Y, Y_path))

        transferset = ImageTensorSet((X, Y))

        # ----------- Train
        #np.random.seed(cfg.DEFAULT_SEED)
        #torch.manual_seed(cfg.DEFAULT_SEED)
        #torch.cuda.manual_seed(cfg.DEFAULT_SEED)
        optimizer = get_optimizer(adv_model.parameters(),
                                  params['optimizer_choice'], **params)
        print(f"Train on {len(transferset)} samples")
        checkpoint_suffix = '.{extraction}'
        best_acc = model_utils.train_model(adv_model,
                                           transferset,
                                           model_dir,
                                           testset=testset,
                                           criterion_train=criterion_train,
                                           checkpoint_suffix=checkpoint_suffix,
                                           device=device,
                                           optimizer=optimizer,
                                           benchmark=best_acc,
                                           **params)
        best_test_acc.append(best_acc)
        adversary.agent.actor.save(
            osp.join(model_dir, "checkpoint.agent.state_dict"))

        # ----------- Log
        torch.save(best_test_acc, osp.join(model_dir, "best_acc.pylist"))
        torch.save(avg_rewards, osp.join(model_dir, "avg_rewards.pylist"))
        torch.save(avg_components, osp.join(model_dir,
                                            "avg_components.pydict"))
        torch.save(adversary.idx_counter,
                   osp.join(model_dir, "idx_counter.pydict"))
        torch.save(transferset, osp.join(model_dir, "transferset.pt"))

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(model_dir, 'params_train.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)

    agent_params_out_path = osp.join(model_dir, 'agent_params_train.json')
    with open(agent_params_out_path, 'w') as jf:
        json.dump(agent_params, jf, indent=True)