Exemplo n.º 1
0
 def save(self, filename):
     filename = name_handler(os.path.join('save', filename),
                             'csv',
                             overwrite=False)
     title = [
         'n_fold', 'k2', 'zeta', 'kappa', 'gamma', 'score', 'blk_cleans',
         'blk_advs'
     ]
     with open(filename, 'w') as file:
         line = ','.join(title)
         file.write(line + '\n')
         for i in range(len(self.folds)):
             line_builder = []
             line_builder.append(self.folds[i])
             line_builder.append(self.k2s[i])
             line_builder.append(self.zetas[i])
             line_builder.append(self.kappas[i])
             line_builder.append(self.gammas[i])
             line_builder.append(self.scores[i])
             line_builder.append(self.blk_cleans[i])
             line_builder.append(self.blk_advs[i])
             line = ','.join([str(i) for i in line_builder])
             file.write(line + '\n')
         file.close()
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-d', '--dataset', type=str, required=True,
        help='Name of the dataset')
    parser.add_argument(
        '--depth', type=int, default=0,
        help='The image color depth for input images. Apply Binary-Depth filter when receives a parameter')
    parser.add_argument(
        '-s', '--sigma', type=float, default=0,
        help='The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter')
    parser.add_argument(
        '-k', '--kernelsize', type=int, default=0,
        help='The kernel size for Median filter. Apply median filter when receives a parameter')
    parser.add_argument(
        '-i', '--iteration', type=int, default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    args = parser.parse_args()
    data_name = args.dataset
    max_iterations = args.iteration
    bit_depth = args.depth
    sigma = args.sigma
    kernel_size = args.kernelsize
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, data_name, verbose, save_log)

    # Which filter should apply?
    filter_list = []
    if bit_depth > 0:
        filter_list.append('binary')
    if sigma > 0:
        filter_list.append('normal')
    if kernel_size > 0:
        filter_list.append('median')

    result_filename = name_handler(
        os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'),
        'csv',
        overwrite=overwrite)

    # show parameters
    print(f'[{LOG_NAME}] Running tree model...')
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', data_name)
    logger.info('iterations  :%d', max_iterations)
    logger.info('bit_depth   :%d', bit_depth)
    logger.info('sigma       :%f', sigma)
    logger.info('kernel_size :%d', kernel_size)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)
    logger.info('filename    :%s', result_filename)

    # NOTE: Why does train all adversarial examples not work?
    # The classification models are depended on the training set. They are not
    # identical, thus adversarial examples are also not the same.

    with open(result_filename, 'w') as file:
        file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n')
        i = 1
        while i <= max_iterations:
            num_blk_clean, num_blk_adv = experiment(
                data_name, filter_list, bit_depth, sigma, kernel_size)
            if num_blk_adv == -1:
                continue
            i += 1
            file.write(f'{i},{num_blk_clean},{num_blk_adv}\n')
        file.close()
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-d', '--dataset', type=str, required=True,
        help='Name of the dataset')
    parser.add_argument(
        '-i', '--iteration', type=int, default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument(
        '-p', '--param', type=str, required=True,
        help='a JSON config file which contains the parameters for the applicability domain')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    args = parser.parse_args()
    data_name = args.dataset
    max_iterations = args.iteration
    param_file = args.param
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, data_name, verbose, save_log)

    # load parameters for Applicability Domain
    with open(param_file) as param_json:
        params = json.load(param_json)

    result_filename = name_handler(
        os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'),
        'csv',
        overwrite=overwrite)

    # show parameters
    print(f'[{LOG_NAME}] Running tree model...')
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', data_name)
    logger.info('iterations  :%d', max_iterations)
    logger.info('param file  :%s', param_file)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)
    logger.info('filename    :%s', result_filename)
    logger.debug('params     :%s', str(params))

    # NOTE: Why does train all adversarial examples not work?
    # The classification models are depended on the training set. They are not
    # identical, thus adversarial examples are also not the same.

    with open(result_filename, 'w') as file:
        file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n')
        for i in range(max_iterations):
            num_blk_clean, num_blk_adv = experiment(data_name, params)
            file.write(f'{i},{num_blk_clean},{num_blk_adv}\n')
        file.close()
def main():
    parser = ap.ArgumentParser()
    parser.add_argument('-s',
                        '--size',
                        type=int,
                        required=True,
                        help='the number of sample size')
    parser.add_argument('-f',
                        '--features',
                        type=int,
                        required=True,
                        help='the number of features')
    parser.add_argument('-c',
                        '--classes',
                        type=int,
                        default=2,
                        help='the number of classes')
    parser.add_argument(
        '-i',
        '--iteration',
        type=int,
        default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument('-e',
                        '--epoch',
                        type=int,
                        required=True,
                        help='the number of max epochs for training')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    parser.add_argument('-w',
                        '--overwrite',
                        action='store_true',
                        default=False,
                        help='overwrite the existing file')

    args = parser.parse_args()
    sample_size = args.size
    num_features = args.features
    num_classes = args.classes
    max_iterations = args.iteration
    max_epochs = args.epoch
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}'
    set_logging(LOG_NAME, dname, verbose, save_log)

    print('[{}] Start experiment on {}...'.format(LOG_NAME, dname))
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', dname)
    logger.info('train size  :%d', sample_size)
    logger.info('num features:%d', num_features)
    logger.info('num classes :%d', num_classes)
    logger.info('iterations  :%d', max_iterations)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)

    result_file = name_handler(os.path.join(
        'save', f'{LOG_NAME}_{dname}_i{max_iterations}'),
                               'csv',
                               overwrite=overwrite)

    adv_file = name_handler(os.path.join('save',
                                         f'{LOG_NAME}_{dname}_AdvExamples'),
                            'csv',
                            overwrite=overwrite)

    adv_file = open(adv_file, 'w')
    adv_file.write(','.join(TITLE_ADV) + '\n')
    res_file = open(result_file, 'w')
    res_file.write(','.join(TITLE_RESULTS) + '\n')
    for i in range(max_iterations):
        since = time.time()
        # generate synthetic data
        x, y = make_classification(
            n_samples=sample_size + 1000,
            n_features=num_features,
            n_informative=num_classes,
            n_redundant=0,
            n_classes=num_classes,
            n_clusters_per_class=1,
        )

        # normalize data
        x_max = np.max(x, axis=0)
        x_min = np.min(x, axis=0)
        # NOTE: Carlini attack expects the data in range [0, 1]
        # x_mean = np.mean(x, axis=0)
        # x = scale_normalize(x, x_min, x_max, x_mean)
        x = scale_normalize(x, x_min, x_max)

        # training/test split
        # NOTE: test set has fixed size
        x_train = np.array(x[:-1000], dtype=np.float32)
        y_train = np.array(y[:-1000], dtype=np.long)
        x_test = np.array(x[-1000:], dtype=np.float32)
        y_test = np.array(y[-1000:], dtype=np.long)

        # create data container
        data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes,
                                               num_features)
        dc = DataContainer(data_dict, get_data_path())

        # assign data manually
        dc.x_train = x_train
        dc.y_train = y_train
        dc.x_test = x_test
        dc.y_test = y_test

        experiment(i, dc, max_epochs, adv_file, res_file)
        time_elapsed = time.time() - since
        print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format(
            dname, i + 1, max_iterations, int(time_elapsed // 60),
            time_elapsed % 60))

    adv_file.close()
    res_file.close()
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format')
    parser.add_argument(
        '-p', '--param', type=str, required=True,
        help='a JSON config file which contains the parameters for the cross validation')
    parser.add_argument(
        '-a', '--adv', type=str,
        help='file name of adv. examples for testing. If it\'s none, the program will ignore testing. The name should in "<model>_<dataset>_<attack>_adv.npy" format')
    parser.add_argument(
        '-s', '--seed', type=int, default=4096,
        help='the seed for random number generator')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-i', '--ignore', action='store_true', default=False,
        help='Ignore saving the results. Only returns the results from terminal.')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    args = parser.parse_args()
    model_file = args.model
    param_file = args.param
    adv_file = args.adv
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog
    does_ignore = args.ignore
    overwrite = args.overwrite

    model_name, data_name = parse_model_filename(model_file)

    # set logging config. Run this before logging anything!
    set_logging('cross_validation', data_name, verbose, save_log)

    # check files
    for file_path in [model_file, param_file]:
        if not os.path.exists(file_path):
            logger.warning('%s does not exist. Exit.', file_path)
            sys.exit(0)
    if adv_file is not None and not os.path.exists(adv_file):
        logger.warning('%s does not exist. Exit.', adv_file)
        sys.exit(0)

    # read parameters
    with open(param_file) as param_json:
        params = json.load(param_json)

    # show parameters
    print('[cv] Running cross validation on {} with {}...'.format(
        model_file, data_name))
    logger.info('Start at      : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model file    :%s', model_file)
    logger.info('adv file      :%s', adv_file)
    logger.info('model         :%s', model_name)
    logger.info('dataset       :%s', data_name)
    logger.info('param file    :%s', param_file)
    logger.info('seed          :%d', seed)
    logger.info('verbose       :%r', verbose)
    logger.info('save_log      :%r', save_log)
    logger.info('Ignore saving :%r', does_ignore)
    logger.info('overwrite     :%r', overwrite)
    logger.debug('params       :%s', str(params))

    # load parameters
    k_range = params['k_range']
    z_range = params['z_range']
    kappa_range = params['kappa_range']
    gamma_range = params['gamma_range']
    epsilon = params['epsilon']
    num_folds = params['num_folds']
    batch_size = params['batch_size']
    sample_ratio = params['sample_ratio']
    logger.info('k_range       :%s', str(k_range))
    logger.info('z_range       :%s', str(z_range))
    logger.info('kappa_range   :%s', str(kappa_range))
    logger.info('gamma_range   :%s', str(gamma_range))
    logger.info('epsilon       :%.1f', epsilon)
    logger.info('num_folds     :%d', num_folds)
    logger.info('batch_size    :%d', batch_size)
    logger.info('sample_ratio  :%.1f', sample_ratio)

    # reset seed
    master_seed(seed)

    dc = DataContainer(DATASET_LIST[data_name], get_data_path())
    dc(shuffle=True, normalize=True, size_train=0.8)
    logger.info('Sample size: %d', len(dc))

    Model = get_model(model_name)
    # there models require extra keyword arguments
    if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        num_classes = dc.num_classes
        num_features = dc.dim_data[0]
        kwargs = {
            'num_features': num_features,
            'hidden_nodes': num_features*4,
            'num_classes': num_classes,
        }
        model = Model(**kwargs)
    else:
        model = Model()
    logger.info('Use %s model', model.__class__.__name__)
    mc = ModelContainerPT(model, dc)
    mc.load(model_file)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    ad = ApplicabilityDomainContainer(
        mc, hidden_model=model.hidden_model, sample_ratio=sample_ratio)
    cross_validation = CrossValidation(
        ad,
        num_folds=num_folds,
        k_range=k_range,
        z_range=z_range,
        kappa_range=kappa_range,
        gamma_range=gamma_range,
        epsilon=epsilon,
    )
    bim_attack = BIMContainer(
        mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False,
    )
    cross_validation.fit(bim_attack)

    # test optimal parameters
    if adv_file is not None:
        postfix = ['adv', 'pred', 'x', 'y']
        data_files = [adv_file.replace('_adv', '_' + s) for s in postfix]
        adv = np.load(data_files[0], allow_pickle=False)
        pred_adv = np.load(data_files[1], allow_pickle=False)
        x = np.load(data_files[2], allow_pickle=False)
        pred = np.load(data_files[3], allow_pickle=False)

        # fetch optimal parameters
        ad = ApplicabilityDomainContainer(
            mc,
            hidden_model=model.hidden_model,
            k2=cross_validation.k2,
            reliability=cross_validation.reliability,
            sample_ratio=sample_ratio,
            kappa=cross_validation.kappa,
            confidence=cross_validation.confidence,
        )
        logger.info('Params: %s', str(ad.params))
        ad.fit()
        blocked_indices = ad.detect(x, pred, return_passed_x=False)
        logger.info('Blocked %d/%d on clean data',
                    len(blocked_indices), len(x))
        blocked_indices = ad.detect(adv, pred_adv, return_passed_x=False)
        logger.info('Blocked %d/%d on adv. examples.',
                    len(blocked_indices), len(adv))

    # save results
    if not does_ignore:
        file_name = name_handler(
            model_name + '_' + data_name, 'csv', overwrite=overwrite)
        cross_validation.save(file_name)
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-d', '--dataset', type=str, required=True,
        help='Name of the dataset')
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help='Name of the model')
    parser.add_argument(
        '-i', '--iteration', type=int, default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument(
        '-e', '--epoch', type=int, required=True,
        help='the number of max epochs for training')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')

    # NOTE: the JSON file for parameter are hard coded.
    # We expect to run multiple attacks and defences in one iteration.
    args = parser.parse_args()
    dname = args.dataset
    mname = args.model
    max_iterations = args.iteration
    max_epochs = args.epoch
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, dname, verbose, save_log)

    print('[{}] Start experiment on {} {} i{} e{}...'.format(
        LOG_NAME, mname, dname, max_iterations, max_epochs))
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', dname)
    logger.info('model       :%s', mname)
    logger.info('iterations  :%d', max_iterations)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)

    adv_file = name_handler(
        os.path.join('save', f'{LOG_NAME}_{dname}_{mname}_acc'),
        'csv',
        overwrite=overwrite)
    result_file = name_handler(
        os.path.join(
            'save', f'{LOG_NAME}_{dname}_{mname}_res_i{max_iterations}'),
        'csv',
        overwrite=overwrite)

    adv_file = open(adv_file, 'w')
    adv_file.write(','.join(TITLE_ADV) + '\n')
    res_file = open(result_file, 'w')
    res_file.write(','.join(TITLE_RESULTS) + '\n')
    for i in range(max_iterations):
        since = time.time()
        experiment(i, dname, mname, max_epochs, adv_file, res_file)
        time_elapsed = time.time() - since
        print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format(
            dname,
            i+1,
            max_iterations,
            int(time_elapsed // 60),
            time_elapsed % 60))

    adv_file.close()
    res_file.close()
 def test_name_handler(self):
     x = name_handler('test', 'test', overwrite=True)
     self.assertEqual(x, 'test.test')