Python CelebA示例，data_importer.CelebA Python示例

示例#1

0

显示文件

文件： main.py 项目： anieto95/homogan

import GAN as gan

print(tf.__version__)
tf.keras.backend.set_floatx('float32')

# ### Load and prepare the dataset

# ####Download dataset

# Male Female attribute is "Male" in datasaet, -1 female, 1 male

# ####CelebA dataset Wrapper

####Prepare dataset

celeba = CelebA()

data_dir = pathlib.Path(celeba.images_folder + "")
image_count = len(list(data_dir.glob('*/*.jpg')))
image_list = list(data_dir.glob('*/*.jpg'))
image_list = [str(x) for x in image_list]
len(image_list)
print(len(list(data_dir.glob('*/*.jpg'))))

IMG_HEIGHT = 128
IMG_WIDTH = 128
BUFFER_SIZE = 3000
BATCH_SIZE = 16
NUM_IMAGES_USED = 10000
STEPS_PER_EPOCH = np.ceil(NUM_IMAGES_USED / BATCH_SIZE)
CLASS_NAMES = celeba.features_name

示例#2

0

显示文件

文件： main.py 项目： anieto95/homogan

print(tf.__version__)
tf.keras.backend.set_floatx('float32')

# ### Load and prepare the dataset

# ####Download dataset

# Male Female attribute is "Male" in datasaet, -1 female, 1 male

# ####CelebA dataset Wrapper

####Prepare dataset

features = ['Male', 'Wearing_Hat', 'Eyeglasses', 'No_Beard']
celeba = CelebA(selected_features=features,
                main_folder='/content/celeba-dataset')

feat_df = celeba.attributes
feat_df = feat_df[feat_df.Male == 1]
feat_df = feat_df[feat_df.Wearing_Hat == 0]
feat_df = feat_df[feat_df.Eyeglasses == 0]
feat_df = feat_df[feat_df.No_Beard == 0]

feat_df['image_id'] = feat_df['image_id'].apply(
    lambda x: '/content/celeba-dataset/img_align_celeba/img_align_celeba/' + x)

image_list = feat_df['image_id'].tolist()

IMG_HEIGHT = 128
IMG_WIDTH = 128
BUFFER_SIZE = 3000

示例#3

0

显示文件

文件： main.py 项目： anieto95/homogan

def main(dataset_folder = "/content/celeba-dataset"):

  # ### Load and prepare the dataset

  # ####Download dataset

  # Male Female attribute is "Male" in datasaet, -1 female, 1 male

  # ####CelebA dataset Wrapper

  ####Prepare dataset

  features = ['Male']
  celeba = CelebA(selected_features=features, main_folder=dataset_folder)

  feat_df = celeba.attributes
  feat_df['image_id'] = feat_df['image_id'].apply(
    lambda x: dataset_folder + '/img_align_celeba/img_align_celeba/'+x)
  feat_male = feat_df[feat_df.Male == 1][:5000]
  feat_female = feat_df[feat_df.Male == 0][:5000]

  # feat_male['image_id'] = feat_male['image_id'].apply(
  #   lambda x: '/content/celeba-dataset/img_align_celeba/img_align_celeba/'+x)
  # feat_female['image_id'] = feat_female['image_id'].apply(
  #   lambda x: '/content/celeba-dataset/img_align_celeba/img_align_celeba/'+x)

  image_list = feat_male['image_id'].tolist()
  image_list = image_list + feat_female['image_id'].tolist()
  joined = pd.concat([feat_male,feat_female])
  print(len(joined))
  # print(list(joined['image_id'][:10]))
  # exit()

  IMG_HEIGHT = 128
  IMG_WIDTH = 128
  BUFFER_SIZE = 3000
  BATCH_SIZE = 100
  NUM_IMAGES_USED = len(image_list)
  noise_dim = 256
  STEPS_PER_EPOCH = np.ceil(NUM_IMAGES_USED/BATCH_SIZE)
  CLASS_NAMES = celeba.features_name

  img_shape = (IMG_HEIGHT, IMG_WIDTH, 3)

  def _parse_function(filename, labels):
      #Images are loaded and decoded
      image_string = tf.io.read_file(filename)
      image_decoded = tf.image.decode_jpeg(image_string, channels=3)
      image = tf.cast(image_decoded, tf.float32)

      #Reshaping, normalization and optimization goes here
      image = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
      
      # mean, std = tf.reduce_mean(image), tf.math.reduce_std(image)
      # image = (image-mean)/std # Normalize the images to [0, 1]
      image = image/255
      return image, labels


  #Labels and images are created
  # labels = celeba.attributes.drop('image_id', 1)
  # labels = labels.applymap(lambda x: 1 if x else 0) 
  # labels = tf.constant(labels.values, dtype=tf.dtypes.float32)


  #Create data set and map it. Could be improved if we can load images previously
  # and avoid mapping it later.
  training_images = (tf.data.Dataset.from_tensor_slices((list(joined['image_id'][:NUM_IMAGES_USED]), list(joined['Male'][:NUM_IMAGES_USED]))))

  training_dataset = training_images.map(_parse_function)

  #Shuffle and batch
  training_dataset = training_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

  # print(f"Device being used {tf.test.gpu_device_name()}")

  generator = gan.Generator(noise_dim)
  generator.summary()
  discriminator = gan.Discriminator()
  discriminator.summary()
  # exit()
  #Smoothing labels posive between 0.9-1.0 and negative between 0.0-0.1
  def smooth_pos_and_trick(y):
      tensor = np.random.uniform(0.9,1,y.shape[0])
      to_subst = np.ceil(y.shape[0]*0.05)
      targe = np.random.choice(y.shape[0], int(to_subst))
      for idx in targe:
          tensor[idx] = abs(tensor[idx]-1)
      return tf.convert_to_tensor(tensor, dtype=tf.float32)

  def smooth_neg_and_trick(y):
      tensor = np.random.uniform(0,0.1, y.shape[0])
      to_subst = np.ceil(y.shape[0]*0.05)
      targe = np.random.choice(y.shape[0], int(to_subst))
      for idx in targe:
          tensor[idx] = abs(tensor[idx]-1)
      return tf.convert_to_tensor(tensor, dtype=tf.float32)

  checkpoint_dir = './training_checkpoints'

  if not os.path.exists(checkpoint_dir):
      os.makedirs(checkpoint_dir)

  checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
  checkpoint = tf.train.Checkpoint(generator=generator,
                                  discriminator=discriminator)

  manager = tf.train.CheckpointManager(checkpoint, directory = checkpoint_dir, max_to_keep=3)

  EPOCHS = 100
  train_GEN = 1 #Train every batch
  train_DISC = 1 #Train every batch

  def train(g_model, d_model, gan_model, dataset, latent_dim=100, n_epochs=100, train_GEN = 1, train_DISC = 1):
      checkpoint.restore(manager.latest_checkpoint)
      if manager.latest_checkpoint:
          print("Restored from {}".format(manager.latest_checkpoint))
      else:
          print("Initializing from scratch.")

      logdir = './logs/func/'
      if not os.path.exists(logdir):
          os.makedirs(logdir)

      writer = tf.summary.create_file_writer(logdir)

      tf.summary.trace_on(graph=True, profiler=True)

      # manually enumerate epochs
      cycle = 0
      start = time.time()
      for i in range(n_epochs):
          x = 0
          # enumerate batches over the training set
          for image_batch in dataset:
              n_batch = image_batch[0].shape[0]
              if cycle % 50 == 0:
                print(f"Batch: {x}/{NUM_IMAGES_USED/n_batch}")
              x += 1
              cycle += 1
              # get randomly selected 'real' samples
              X_real = image_batch
              y_real = tf.ones(n_batch,1)
              # smoothing
              y_real = smooth_pos_and_trick(y_real)
              if cycle % train_DISC == 0:
                # update discriminator model weights
                d_loss1, _ = d_model.train_on_batch(X_real, y_real)
                # generate 'fake' examples
                X_fake, y_fake = gan.generate_fake_samples(g_model, latent_dim, n_batch)
                # smoothing
                y_fake = smooth_neg_and_trick(y_fake)
                # update discriminator model weights
                d_loss2, _ = d_model.train_on_batch(X_fake, y_fake)
              if cycle % train_GEN == 0:
                # prepare points in latent space as input for the generator
                X_gan = gan.generate_latent_points(latent_dim, n_batch)
                # create inverted labels for the fake samples
                y_gan = np.ones((n_batch, 1))
                # update the generator via the discriminator's error
                g_loss = gan_model.train_on_batch(X_gan, y_gan)
          
              if cycle % 10 == 0:
                with writer.as_default():
                      tf.summary.scalar('Disc loss real', d_loss1, step=cycle)
                      tf.summary.scalar('Disc loss fake', d_loss2, step=cycle)
                      tf.summary.scalar('Gen Loss', g_loss, step=cycle)
          # summarize loss on this batch
          print('Epoch: %d,  Loss: D_real = %.3f, D_fake = %.3f,  G = %.3f' %   (i+1, d_loss1, d_loss2, g_loss))
          # evaluate the model performance
          if (i+1) % 2 == 0:
              gan.summarize_performance(i, g_model, d_model, image_batch, latent_dim)     
          if (i+1) % 20 == 0:
            # Save the model every 10 epochs
              checkpoint.save(file_prefix = checkpoint_prefix)
      print ('Total time for training {} epochs is {} sec'.format(n_epochs, (time.time()-start)))

  # create the gan
  theGan = gan.define_gan(generator, discriminator)

  with tf.device('/device:GPU:0'):
      train(generator, discriminator, theGan, training_dataset, noise_dim, EPOCHS, train_GEN, train_DISC)