Пример #1
0
def bootstrap(hparams):

    output_dir = util.create_run_dir('outputs/last_layer/', hparams)
    util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams)

    dataset_train, dataset_val = input_data(hparams)

    epochs = hparams['epochs']
    lr = hparams['lr']
    batch_size = hparams['batch_size']
    samples = hparams['samples']
    p_dropout = hparams['p_dropout']
    nb_last_layers = hparams['nb_last_layers']

    model = build_last_layer(p_dropout=p_dropout,
                             num_last_layers=nb_last_layers)

    model_path = 'saved_models/{}/{}_last_layer_{}.h5'.format(
        hparams['dataset'], hparams['dataset'].split('-')[0], nb_last_layers)

    model.compile(optimizer=tf.keras.optimizers.SGD(lr=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    name_in = os.path.join(output_dir, 'p_in.h5')
    file_in = h5py.File(name_in, 'a')

    shape_in = ((NUM_TEST_EXAMPLES // batch_size) * batch_size, N_CLASS,
                samples)

    proba_in = file_in.create_dataset(
        'proba',
        shape_in,
        # dtype='f2',
        compression='gzip')

    for i in np.arange(samples):
        dataset_train, dataset_val = input_data(hparams, bootstrap=True)
        model.load_weights(model_path, by_name=True)
        hist = model.fit(dataset_train,
                         epochs=epochs,
                         steps_per_epoch=NUM_TRAINING_EXAMPLES // batch_size,
                         verbose=1,
                         validation_data=dataset_val,
                         validation_steps=NUM_TEST_EXAMPLES // batch_size)
        print('End of boostrap {}'.format(i))

        # computing probabilities
        proba_in[:, :,
                 i] = model.predict(dataset_val,
                                    steps=NUM_TEST_EXAMPLES // batch_size)

    file_in.close()
    del model
    gc.collect()
    print('End of sampling - bootstrap.')
Пример #2
0
def dropout(hparams):

    output_dir = util.create_run_dir('outputs/last_layer/', hparams)
    util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams)

    (features_train_in, y_train_in), (features_val_in, y_val_in), \
      features_val_out = input_data(hparams)

    n_class = y_train_in.shape[1]
    epochs = hparams['epochs']
    lr = hparams['lr']
    batch_size = hparams['batch_size']
    samples = hparams['samples']
    p_dropout = hparams['p_dropout']

    model = build_last_layer(features_train_in, n_class, p_dropout=p_dropout)
    model_path = 'saved_models/{}/{}.h5'.format(
        hparams['dataset'], hparams['dataset'].split('-')[0])

    model.compile(optimizer=keras.optimizers.SGD(lr=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    name_in = os.path.join(output_dir, 'p_in.h5')
    file_in = h5py.File(name_in, 'a')

    shape_in = (features_val_in.shape[0], n_class, samples)

    proba_in = file_in.create_dataset(
        'proba',
        shape_in,
        # dtype='f2',
        compression='gzip')

    if features_val_out is not None:
        name_out = os.path.join(output_dir, 'p_out.h5')
        file_out = h5py.File(name_out, 'a')
        shape_out = (features_val_out.shape[0], n_class, samples)
        proba_out = file_out.create_dataset(
            'proba',
            shape_out,
            # dtype='f2',
            compression='gzip')

    model.load_weights(model_path, by_name=True)
    model.fit(features_train_in,
              y_train_in,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(features_val_in, y_val_in))
    # Sanity check
    score = model.evaluate(features_val_in, y_val_in, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    print('End of training')

    for i in np.arange(samples):
        # computing probabilities
        proba_in[:, :, i] = model.predict(features_val_in)
        if features_val_out is not None:
            proba_out[:, :, i] = model.predict(features_val_out)

    file_in.close()
    if features_val_out is not None:
        file_out.close()
    print('End of sampling - dropout.')
Пример #3
0
def sgd_sgld(hparams):

    output_dir = util.create_run_dir('outputs/last_layer/', hparams)
    util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams)

    (features_train_in, y_train_in), (features_val_in, y_val_in), \
      features_val_out = input_data(hparams)

    n_class = y_train_in.shape[1]
    samples = hparams['samples']
    lr = hparams['lr']
    batch_size = hparams['batch_size']

    params = {
        'optimizer': None,
        'samples': samples,
        'output_dir': output_dir,
        'n_class': n_class
    }

    class Prediction(keras.callbacks.Callback):
        def __init__(self, params, features_val_in, features_val_out):
            super(Prediction, self).__init__()

            self.index = 0

            if features_val_out is None:
                self.out_of_dist = True
            else:
                self.out_of_dist = False

            name_in = os.path.join(params['output_dir'],
                                   'p_{}_in.h5'.format(params['optimizer']))
            self.file_in = h5py.File(name_in, 'a')

            shape_in = (features_val_in.shape[0], params['n_class'],
                        params['samples'])

            self.proba_in = self.file_in.create_dataset(
                'proba',
                shape_in,
                # dtype='f2',
                compression='gzip')
            self.features_val_in = features_val_in

            if not self.out_of_dist:
                name_out = os.path.join(
                    params['output_dir'],
                    'p_{}_out.h5'.format(params['optimizer']))
                self.file_out = h5py.File(name_out, 'a')
                shape_out = (features_val_out.shape[0], params['n_class'],
                             params['samples'])
                self.proba_out = self.file_out.create_dataset(
                    'proba',
                    shape_out,
                    # dtype='f2',
                    compression='gzip')
                self.features_val_out = features_val_out

        def on_epoch_end(self, epoch, logs={}):
            self.proba_in[:, :, self.index] = self.model.predict(
                self.features_val_in)
            if not self.out_of_dist:
                self.proba_out[:, :, self.index] = self.model.predict(
                    self.features_val_out)
            self.index += 1

        def on_train_end(self, logs={}):
            self.file_in.close()
            if not self.out_of_dist:
                self.file_out.close()

    model = build_last_layer(features_train_in, n_class)
    model_path = 'saved_models/{}/{}.h5'.format(
        hparams['dataset'], hparams['dataset'].split('-')[0])

    for opt, optimizer in zip(['sgd', 'sgld'], [
            keras.optimizers.SGD(lr=lr),
            sgld.SGLD(features_train_in.shape[0], lr=lr)
    ]):
        model.load_weights(model_path, by_name=True)
        params['optimizer'] = opt
        model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        mc = Prediction(params, features_val_in, features_val_out)

        hist = model.fit(features_train_in,
                         y_train_in,
                         batch_size=batch_size,
                         epochs=samples,
                         verbose=1,
                         validation_data=(features_val_in, y_val_in),
                         callbacks=[mc])
        print('End of sampling using {}'.format(opt))

    return hist
Пример #4
0
def dropout(hparams):
  
  n_class = hparams['n_class']
  
  output_dir = util.create_run_dir('outputs/full_network/', hparams)
  util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams)
    
  (features_train_in, y_train_in), (features_val_in, y_val_in), \
    features_val_out, index = input_data(hparams)
    
  np.save(os.path.join(output_dir, 'index.npy'), index)
  
  epochs = hparams['epochs']
  lr = hparams['lr']
  batch_size = hparams['batch_size']
  samples = hparams['samples']
  p_dropout = hparams['p_dropout']

  model = build_model(n_class, p_dropout=p_dropout)
  if hparams['dataset'] in ['cifar10-first-10', 'cifar100-first-100']:
    model_path = 'saved_models/cifar-full-network/{}vgg.h5'.format(hparams['dataset'].split('-')[0])
  else:
    model_path = 'saved_models/{}/{}.h5'.format(hparams['dataset'], hparams['dataset'].split('-')[0])

  model.compile(optimizer=keras.optimizers.SGD(lr=lr),
                loss='categorical_crossentropy', 
                metrics=['accuracy'])
  
  #data augmentation
  datagen = ImageDataGenerator(
      featurewise_center=False,  # set input mean to 0 over the dataset
      samplewise_center=False,  # set each sample mean to 0
      featurewise_std_normalization=False,  # divide inputs by std of the dataset
      samplewise_std_normalization=False,  # divide each input by its std
      zca_whitening=False,  # apply ZCA whitening
      rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
      width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
      height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
      horizontal_flip=True,  # randomly flip images
      vertical_flip=False)  # randomly flip images
  # (std, mean, and principal components if ZCA whitening is applied).
  datagen.fit(features_train_in)
  
  name_in = os.path.join(output_dir, 'p_in.h5')
  file_in = h5py.File(name_in, 'a')
  
  shape_in = (features_val_in.shape[0], n_class, samples)
  
  proba_in = file_in.create_dataset('proba', 
                                    shape_in,
                                    # dtype='f2',
                                    compression='gzip')

  if features_val_out is not None:
    name_out = os.path.join(output_dir, 'p_out.h5')
    file_out = h5py.File(name_out, 'a')
    shape_out = (features_val_out.shape[0], n_class, samples)
    proba_out = file_out.create_dataset('proba', 
                                        shape_out,
                                        # dtype='f2',
                                        compression='gzip')      

  model.load_weights(model_path) #, by_name=True)
  model.fit_generator(datagen.flow(features_train_in, y_train_in,
                                   batch_size=batch_size),
            steps_per_epoch=features_train_in.shape[0] // batch_size,
            epochs=epochs,
            verbose=1,
            validation_data=(features_val_in, y_val_in))
  # Sanity check
  score = model.evaluate(features_val_in, y_val_in, verbose=0)
  print('Test loss:', score[0])
  print('Test accuracy:', score[1])
  print('End of training')

  for i in np.arange(samples):
    # computing probabilities
    proba_in[:, :, i] = model.predict(features_val_in)
    if features_val_out is not None:
      proba_out[:, :, i] = model.predict(features_val_out)
  
  file_in.close()
  if features_val_out is not None:
    file_out.close()
  print('End of sampling - dropout.')
Пример #5
0
def sgd_sgld(hparams):  
  
  n_class = hparams['n_class']
  
  output_dir = util.create_run_dir('outputs/full_network/', hparams)
  util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams)
    
  (features_train_in, y_train_in), (features_val_in, y_val_in), \
    features_val_out, index = input_data(hparams)
    
  np.save(os.path.join(output_dir, 'index.npy'), index)

  samples = hparams['samples']
  lr = hparams['lr']
  batch_size = hparams['batch_size']

  params = {'optimizer': None,
            'samples': samples,
            'output_dir': output_dir,
            'n_class': n_class
            }

  class Prediction(keras.callbacks.Callback):

    def __init__(self, params, features_val_in, features_val_out):
      super(Prediction, self).__init__()
      
      self.index = 0
      
      if features_val_out is None:
        self.out_of_dist = True
      else:
        self.out_of_dist = False
      
      name_in = os.path.join(params['output_dir'], 
                             'p_{}_in.h5'.format(params['optimizer']))
      self.file_in = h5py.File(name_in, 'a')
      
      shape_in = (features_val_in.shape[0], params['n_class'], 
                  params['samples'])
      
      self.proba_in = self.file_in.create_dataset('proba', 
                                                  shape_in,
                                                  # dtype='f2',
                                                  compression='gzip')
      self.features_val_in = features_val_in

      if not self.out_of_dist:
        name_out = os.path.join(params['output_dir'], 
                               'p_{}_out.h5'.format(params['optimizer']))
        self.file_out = h5py.File(name_out, 'a')
        shape_out = (features_val_out.shape[0], params['n_class'], 
                     params['samples'])
        self.proba_out = self.file_out.create_dataset('proba', 
                                                      shape_out,
                                                      # dtype='f2',
                                                      compression='gzip')      
        self.features_val_out = features_val_out

    def on_epoch_end(self, epoch, logs={}):
      self.proba_in[:, :, self.index] = self.model.predict(self.features_val_in)
      if not self.out_of_dist:
        self.proba_out[:, :, self.index] = self.model.predict(self.features_val_out)
      self.index += 1
      
    def on_train_end(self, logs={}):
      self.file_in.close()
      if not self.out_of_dist:
        self.file_out.close()
    
  model = build_model(n_class)
  if hparams['dataset'] in ['cifar10-first-10', 'cifar100-first-100']:
    model_path = 'saved_models/cifar-full-network/{}vgg.h5'.format(hparams['dataset'].split('-')[0])
  else:
    model_path = 'saved_models/{}/{}.h5'.format(hparams['dataset'], hparams['dataset'].split('-')[0])
  
  #data augmentation
  datagen = ImageDataGenerator(
      featurewise_center=False,  # set input mean to 0 over the dataset
      samplewise_center=False,  # set each sample mean to 0
      featurewise_std_normalization=False,  # divide inputs by std of the dataset
      samplewise_std_normalization=False,  # divide each input by its std
      zca_whitening=False,  # apply ZCA whitening
      rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
      width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
      height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
      horizontal_flip=True,  # randomly flip images
      vertical_flip=False)  # randomly flip images
  # (std, mean, and principal components if ZCA whitening is applied).
  datagen.fit(features_train_in)

  for opt, optimizer in zip(['sgd', 'sgld'], 
                            [keras.optimizers.SGD(lr=lr), 
                             sgld.SGLD(features_train_in.shape[0], lr=lr)]):
#  for opt, optimizer in zip(['sgld'], 
#                          [sgld.SGLD(features_train_in.shape[0], lr=lr)]):
    model.load_weights(model_path)
    params['optimizer'] = opt
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    mc = Prediction(params, features_val_in, features_val_out)
#    tensorboard = keras.callbacks.TensorBoard(histogram_freq=2,
#                                              batch_size=128,
#                                              write_graph=False,
#                                              write_grads=False,
#                                              )
    
#    score = model.evaluate(features_val_in, y_val_in)
#    print(score)
    
    hist = model.fit_generator(datagen.flow(features_train_in, y_train_in, 
                                            batch_size=batch_size),
                     steps_per_epoch=features_train_in.shape[0] // batch_size,
                     epochs=samples,
                     verbose=1,
                     validation_data=(features_val_in, y_val_in),
                     callbacks=[mc])
    
    print('End of sampling using {}'.format(opt))

  return hist
Пример #6
0
def sgd_sgld(hparams):

    output_dir = util.create_run_dir('outputs/last_layer/', hparams)
    util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams)

    dataset_train, dataset_val = input_data(hparams)

    n_class = N_CLASS
    samples = hparams['samples']
    lr = hparams['lr']
    batch_size = hparams['batch_size']
    nb_last_layers = hparams['nb_last_layers']

    params = {
        'optimizer': None,
        'samples': samples,
        'batch_size': batch_size,
        'output_dir': output_dir,
        'n_class': n_class
    }

    class Prediction(keras.callbacks.Callback):
        def __init__(self, params, dataset_val):
            super(Prediction, self).__init__()

            self.index = 0

            name_in = os.path.join(params['output_dir'],
                                   'p_{}_in.h5'.format(params['optimizer']))
            self.file_in = h5py.File(name_in, 'a')

            self.batch_size = params['batch_size']

            shape_in = ((NUM_TEST_EXAMPLES // self.batch_size) *
                        self.batch_size, params['n_class'], params['samples'])

            self.proba_in = self.file_in.create_dataset(
                'proba',
                shape_in,
                # dtype='f2',
                compression='gzip')
            self.dataset_val = dataset_val

        def on_epoch_end(self, epoch, logs={}):
            nb_steps = NUM_TEST_EXAMPLES // self.batch_size
            self.proba_in[:, :,
                          self.index] = self.model.predict(self.dataset_val,
                                                           steps=nb_steps)
            self.index += 1

        def on_train_end(self, logs={}):
            self.file_in.close()

    model = build_last_layer(num_last_layers=nb_last_layers)
    model_path = 'saved_models/{}/{}_last_layer_{}.h5'.format(
        hparams['dataset'], hparams['dataset'].split('-')[0], nb_last_layers)

    for opt, optimizer in zip(['sgd', 'sgld'], [
            tf.keras.optimizers.SGD(lr=lr),
            tf_sgld.SGLD(NUM_TRAINING_EXAMPLES, lr=lr)
    ]):
        #    if (opt == 'sgd') & only_sgld:
        #      continue
        model.load_weights(model_path, by_name=True)
        params['optimizer'] = opt
        model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        mc = Prediction(params, dataset_val)

        hist = model.fit(dataset_train,
                         epochs=samples,
                         steps_per_epoch=NUM_TRAINING_EXAMPLES // batch_size,
                         verbose=1,
                         validation_data=dataset_val,
                         validation_steps=NUM_TEST_EXAMPLES // batch_size,
                         callbacks=[mc])
        print('End of sampling using {}'.format(opt))

    del model
    gc.collect()
    return hist