class PC2Pix(): def __init__(self, ptcloud_ae=None, gw=None, dw=None, pc_code_dim=32, batch_size=64, color=True, gpus=1, norm=False, category='all'): self.noise_dim = 128 self.ptcloud_ae = ptcloud_ae self.gw = gw self.dw = dw self.gpus = gpus self.pc_code_dim = pc_code_dim self.category = category self.model_dir = "saved_models" self.kernel_size = 3 self.batch_size = batch_size self.generator = None self.discriminator = None self.adversarial = None os.makedirs(self.model_dir, exist_ok=True) os.makedirs("weights", exist_ok=True) self.color = color self.gen_spectral_normalization = False if color: # color images 128x128 rgb items = ['im_128', 'pc', 'elev', 'azim'] # if big color (224 x 224) rgb # items = ['im', 'pc', 'elev', 'azim'] else: # graycale images 224x224 items = ['gray', 'pc', 'elev', 'azim'] if category == 'all': if norm: path = 'all_exp_norm.json' else: path = category + '_exp.json' else: path = category + '_exp.json' self.split_file = os.path.join('data', path) self.train_source = DataSource(batch_size=self.batch_size, items=items, split_file=self.split_file) shapenet = self.train_source.dset self.epoch_datalen = len( shapenet.get_smids('train')) * shapenet.num_renders self.train_steps = self.epoch_datalen // self.batch_size pc_codes = "pc_codes" path = self.category + "-" + str(pc_code_dim) + "-pc_codes.npy" self.pc_codes_filename = os.path.join(pc_codes, path) # "weights/pc_codes.npy" self.test_source = DataSource(batch_size=36, smids='test', items=items, nepochs=20, split_file=self.split_file) self.build_gan() def generate_fake_pc_codes(self): fake_pc_codes = None start_time = datetime.datetime.now() print("Generating fake pc codes...") steps = 4 * self.train_steps for i in range(steps): _, fake_pc, _, _ = self.train_source.next_batch() fake_pc = fake_pc / 0.5 fake_pc_code = self.ptcloud_ae.encoder.predict(fake_pc) if fake_pc_codes is None: fake_pc_codes = fake_pc_code else: fake_pc_codes = np.append(fake_pc_codes, fake_pc_code, axis=0) elapsed_time = datetime.datetime.now() - start_time pcent = 100. * float(i) / steps log = "%0.2f%% [shape: %s] [time: %s]" % ( pcent, fake_pc_codes.shape, elapsed_time) print(log) print("Saving pc codes to file: ", self.pc_codes_filename) np.save(self.pc_codes_filename, fake_pc_codes) def train_gan(self): plot_interval = 500 save_interval = 500 start_time = datetime.datetime.now() test_image, pc, test_elev_code, test_azim_code = self.test_source.next_batch( ) pc = pc / 0.5 test_pc_code = self.ptcloud_ae.encoder.predict(pc) noise_ = np.random.uniform(-1.0, 1.0, size=[36, self.noise_dim]) test_image -= 0.5 test_image /= 0.5 ### test_elev_code *= 0.5 test_elev_code += 0.5 test_azim_code *= 0.5 test_azim_code += 0.5 ### plot_image(test_image, color=self.color) valid = np.ones([self.batch_size, 1]) fake = np.zeros([self.batch_size, 1]) valid_fake = np.concatenate((valid, fake)) epochs = 120 train_steps = self.train_steps * epochs fake_pc_codes = np.load(self.pc_codes_filename) fake_pc_codes_len = len(fake_pc_codes) print("Loaded pc codes", self.pc_codes_filename, " with len: ", fake_pc_codes_len) print("fake_pc_codes min: ", np.amin(fake_pc_codes), "fake_pc_codes max: ", np.amax(fake_pc_codes)) print("test_pc_code min: ", np.amin(test_pc_code), " test_pc_code max: ", np.amax(test_pc_code)) print("test_elev_code min: ", np.amin(test_elev_code), " test_elev_code max: ", np.amax(test_elev_code)) print("test_azim_code min: ", np.amin(test_azim_code), " test_azim_code max: ", np.amax(test_azim_code)) print("batch_size: ", self.batch_size, " pc_code_dim: ", self.pc_code_dim) print("Color images: ", self.color) for step in range(train_steps): real_image, real_pc, real_elev_code, real_azim_code = self.train_source.next_batch( ) real_image -= 0.5 real_image /= 0.5 # pc is [-0.5, 0.5] real_pc = real_pc / 0.5 real_pc_code = self.ptcloud_ae.encoder.predict(real_pc) rand_indexes = np.random.randint(0, fake_pc_codes_len, size=self.batch_size) fake_pc_code = fake_pc_codes[rand_indexes] pc_code = np.concatenate((real_pc_code, fake_pc_code)) ### # fake_view_code = np.random.uniform(-1.0, 1.0, size=[self.batch_size, self.view_dim]) real_elev_code *= 0.5 real_elev_code += 0.5 fake_elev_code = np.random.uniform(0.0, 1.0, size=[self.batch_size, 1]) real_azim_code *= 0.5 real_azim_code += 0.5 fake_azim_code = np.random.uniform(0.0, 1.0, size=[self.batch_size, 1]) ### elev_code = np.concatenate((real_elev_code, fake_elev_code)) azim_code = np.concatenate((real_azim_code, fake_azim_code)) noise = np.random.uniform(-1.0, 1.0, size=[self.batch_size, self.noise_dim]) fake_image = self.generator.predict( [noise, fake_pc_code, fake_elev_code, fake_azim_code]) x = np.concatenate((real_image, fake_image)) metrics = self.discriminator.train_on_batch( x, [valid_fake, pc_code, elev_code, azim_code]) pcent = step * 100.0 / train_steps fmt = "%02.4f%%/%06d:[loss:%02.6f d:%02.6f pc:%02.6f elev:%02.6f azim:%02.6f]" log = fmt % (pcent, step, metrics[0], metrics[1], metrics[2], metrics[3], metrics[4]) rand_indexes = np.random.randint(0, fake_pc_codes_len, size=self.batch_size) fake_pc_code = fake_pc_codes[rand_indexes] ### # fake_view_code = np.random.uniform(-1.0, 1.0, size=[self.batch_size, self.view_dim]) fake_elev_code = np.random.uniform(0.0, 1.0, size=[self.batch_size, 1]) fake_azim_code = np.random.uniform(0.0, 1.0, size=[self.batch_size, 1]) ### noise = np.random.uniform(-1.0, 1.0, size=[self.batch_size, self.noise_dim]) metrics = self.adversarial.train_on_batch( [noise, fake_pc_code, fake_elev_code, fake_azim_code], [valid, fake_pc_code, fake_elev_code, fake_azim_code]) fmt = "%s [loss:%02.6f a:%02.6f pc:%02.6f elev:%02.6f azim:%02.6f]" log = fmt % (log, metrics[0], metrics[1], metrics[2], metrics[3], metrics[4]) elapsed_time = datetime.datetime.now() - start_time log = "%s [time: %s]" % (log, elapsed_time) print(log) if (step + 1) % plot_interval == 0 or step == 0: # plot generator images on a periodic basis show = False plot_images(self.generator, noise=noise_, pc_code=test_pc_code, elev_code=test_elev_code, azim_code=test_azim_code, color=self.color, show=show, step=(step + 1)) if (step + 1) % save_interval == 0 or step == 0: # save weights on a periodic basis prefix = self.category + "-gen" if self.color: prefix += "-color" else: prefix += "-gray" if self.gen_spectral_normalization: prefix += "-sn" prefix += "-" + str(self.pc_code_dim) fname = os.path.join("weights", prefix + ".h5") self.generator_single.save_weights(fname) prefix = self.category + "-dis" if self.color: prefix += "-color" else: prefix += "-gray" if self.gen_spectral_normalization: prefix += "-sn" prefix += "-" + str(self.pc_code_dim) fname = os.path.join("weights", prefix + ".h5") self.discriminator_single.save_weights(fname) def azim_loss(self, y_true, y_pred): rad = 2. * np.pi rad *= (y_true - y_pred) return K.mean(K.abs(tf.atan2(K.sin(rad), K.cos(rad))), axis=-1) def elev_loss(self, y_true, y_pred): # rad = 2. * np.pi * 80. /360. rad = 0.4444444444444444 * np.pi rad *= (y_true - y_pred) return K.mean(K.abs(tf.atan2(K.sin(rad), K.cos(rad))), axis=-1) def build_gan(self): # set if generator is going to use spectral norm image, pc, elev, azim = self.train_source.next_batch() elev_code = Input(shape=(1, ), name='elev_code') azim_code = Input(shape=(1, ), name='azim_code') pc_code = Input(shape=(self.pc_code_dim, ), name='pc_code') noise_code = Input(shape=(self.noise_dim, ), name='noise_code') model_name = "pc2pix" image_size = image.shape[1] if self.color: input_shape = (image_size, image_size, 3) else: input_shape = (image_size, image_size, 1) inputs = Input(shape=input_shape, name='image_input') if self.gen_spectral_normalization: optimizer = Adam(lr=4e-4, beta_1=0.0, beta_2=0.9) else: optimizer = Adam(lr=2e-4, beta_1=0.5, beta_2=0.999) # build discriminator # by default, discriminator uses SN if self.gpus <= 1: self.discriminator = model.discriminator( input_shape, pc_code_dim=self.pc_code_dim) if self.dw is not None: print("loading discriminator weights: ", self.dw) self.discriminator.load_weights(self.dw) self.discriminator_single = self.discriminator else: with tf.device("/cpu:0"): self.discriminator_single = model.discriminator( input_shape, pc_code_dim=self.pc_code_dim) if self.dw is not None: print("loading discriminator weights: ", self.dw) self.discriminator_single.load_weights(self.dw) self.discriminator = multi_gpu_model(self.discriminator_single, gpus=self.gpus) loss = ['binary_crossentropy', 'mae', self.elev_loss, self.azim_loss] loss_weights = [1., 10., 10., 10.] self.discriminator.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer) self.discriminator_single.summary() path = os.path.join(self.model_dir, "discriminator.png") plot_model(self.discriminator_single, to_file=path, show_shapes=True) # build generator # try SN to see if mode collapse is avoided if self.gpus <= 1: self.generator = model.generator( input_shape, noise_code=noise_code, pc_code=pc_code, elev_code=elev_code, azim_code=azim_code, spectral_normalization=self.gen_spectral_normalization, color=self.color) if self.gw is not None: print("loading generator weights: ", self.gw) self.generator.load_weights(self.gw) self.generator_single = self.generator else: with tf.device("/cpu:0"): self.generator_single = model.generator( input_shape, noise_code=noise_code, pc_code=pc_code, elev_code=elev_code, azim_code=azim_code, spectral_normalization=self.gen_spectral_normalization, color=self.color) if self.gw is not None: print("loading generator weights: ", self.gw) self.generator_single.load_weights(self.gw) self.generator = multi_gpu_model(self.generator_single, gpus=self.gpus) self.generator_single.summary() path = os.path.join(self.model_dir, "generator.png") plot_model(self.generator_single, to_file=path, show_shapes=True) self.discriminator.trainable = False if self.gen_spectral_normalization: optimizer = Adam(lr=1e-4, beta_1=0.0, beta_2=0.9) else: optimizer = Adam(lr=1e-4, beta_1=0.5, beta_2=0.999) if self.gpus <= 1: self.adversarial = Model( [noise_code, pc_code, elev_code, azim_code], self.discriminator( self.generator([noise_code, pc_code, elev_code, azim_code])), name=model_name) self.adversarial_single = self.adversarial else: with tf.device("/cpu:0"): self.adversarial_single = Model( [noise_code, pc_code, elev_code, azim_code], self.discriminator( self.generator( [noise_code, pc_code, elev_code, azim_code])), name=model_name) self.adversarial = multi_gpu_model(self.adversarial_single, gpus=self.gpus) self.adversarial.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer) self.adversarial_single.summary() path = os.path.join(self.model_dir, "adversarial.png") plot_model(self.adversarial_single, to_file=path, show_shapes=True) print("Using split file: ", self.split_file) print("1 epoch datalen: ", self.epoch_datalen) print("1 epoch train steps: ", self.train_steps) print("Using pc codes: ", self.pc_codes_filename) def stop_sources(self): self.train_source.close() self.test_source.close() def __del__(self): self.stop_sources()
class PtCloudStackedAE(): def __init__(self, latent_dim=32, kernel_size=5, lr=1e-4, category="all", evaluate=False, emd=True): self.latent_dim = latent_dim self.lr = lr self.batch_size = 32 self.evaluate = evaluate self.emd = emd self.inputs = None self.encoder = None self.decoder = None self.ae = None self.z_log_var = None self.z_mean = None self.z = None self.kernel_size = kernel_size batch_size = 32 self.model_dir = "saved_models" os.makedirs(self.model_dir, exist_ok=True) self.category = category if category == 'all': path = 'all_exp_norm.json' else: path = category + '_exp.json' split_file = os.path.join('data', path) print("Using train split file: ", split_file) self.train_source = DataSource(batch_size=batch_size, split_file=split_file) self.test_source = DataSource(batch_size=batch_size, smids='test', nepochs=20, split_file=split_file) shapenet = self.train_source.dset self.epoch_datalen = len( shapenet.get_smids('train')) * shapenet.num_renders self.train_steps = len(shapenet.get_smids( 'train')) * shapenet.num_renders // self.batch_size _, pc = self.train_source.next_batch() self.input_shape = pc[0].shape self.build_ae() def encoder_layer(self, x, filters, strides=1, dilation_rate=1): x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv1D(filters=filters, kernel_size=self.kernel_size, strides=strides, dilation_rate=dilation_rate, padding='same')(x) return x def compression_layer(self, x, y, maxpool=True): if maxpool: y = MaxPooling1D()(y) x = concatenate([x, y]) y = Conv1D(filters=64, kernel_size=1, activation='relu', padding='same')(x) return x, y def build_encoder(self, filters=64, activation='linear'): self.inputs = Input(shape=self.input_shape, name='encoder_input') x = self.inputs y = self.inputs strides = 2 maxpool = True x1 = self.encoder_layer(x, filters, strides=1, dilation_rate=1) x2 = self.encoder_layer(x, filters, strides=1, dilation_rate=2) x4 = self.encoder_layer(x, filters, strides=1, dilation_rate=4) x8 = self.encoder_layer(x, filters, strides=1, dilation_rate=8) x = concatenate([x1, x2, x4, x8]) x, y = self.compression_layer(x, y, maxpool=False) x = self.encoder_layer(x, 128, strides=2, dilation_rate=1) x1 = self.encoder_layer(x, filters, strides=1, dilation_rate=1) x2 = self.encoder_layer(x, filters, strides=1, dilation_rate=2) x4 = self.encoder_layer(x, filters, strides=1, dilation_rate=4) x8 = self.encoder_layer(x, filters, strides=1, dilation_rate=8) x = concatenate([x1, x2, x4, x8]) x, y = self.compression_layer(x, y, maxpool=True) x = self.encoder_layer(x, 128, strides=2, dilation_rate=1) x1 = self.encoder_layer(x, filters, strides=1, dilation_rate=1) x2 = self.encoder_layer(x, filters, strides=1, dilation_rate=2) x4 = self.encoder_layer(x, filters, strides=1, dilation_rate=4) x8 = self.encoder_layer(x, filters, strides=1, dilation_rate=8) x = concatenate([x1, x2, x4, x8]) x, y = self.compression_layer(x, y, maxpool=True) x = self.encoder_layer(x, 128, strides=2, dilation_rate=1) x1 = self.encoder_layer(x, filters, strides=1, dilation_rate=1) x2 = self.encoder_layer(x, filters, strides=1, dilation_rate=2) x4 = self.encoder_layer(x, filters, strides=1, dilation_rate=4) x8 = self.encoder_layer(x, filters, strides=1, dilation_rate=8) x = concatenate([x1, x2, x4, x8]) x, y = self.compression_layer(x, y, maxpool=True) x = self.encoder_layer(x, 32) shape = K.int_shape(x) x = Flatten()(x) # x = Dense(128, activation='relu')(x) # experimental tanh activation, revert to none or linear if needed outputs = Dense(self.latent_dim, activation=activation, name='ae_encoder_out')(x) path = os.path.join(self.model_dir, "ae_encoder.png") self.encoder = Model(self.inputs, outputs, name='ae_encoder') self.encoder.summary() plot_model(self.encoder, to_file=path, show_shapes=True) return shape, filters def build_decoder_mlp(self, dim=1024): # build decoder model latent_inputs = Input(shape=(self.latent_dim, ), name='decoder_input') x = latent_inputs x = Dense(dim, activation='relu')(x) x = Dense(dim, activation='relu')(x) x = Dense(dim, activation='relu')(x) x = Dense(np.prod(self.input_shape), activation='tanh')(x) outputs = Reshape(self.input_shape)(x) path = os.path.join(self.model_dir, "decoder_mlp.png") # instantiate decoder model self.decoder = Model(latent_inputs, outputs, name='decoder') self.decoder.summary() plot_model(self.decoder, to_file=path, show_shapes=True) def build_decoder(self, filters, shape): # build decoder model latent_inputs = Input(shape=(self.latent_dim, ), name='decoder_input') pt_cloud_shape = (shape[1], shape[2]) dim = shape[1] * shape[2] x = Dense(128, activation='relu')(latent_inputs) x = Dense(dim, activation='relu')(x) x = Reshape(pt_cloud_shape)(x) for i in range(4): x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv1D(filters=filters, kernel_size=self.kernel_size, padding='same')(x) x = UpSampling1D()(x) filters //= 2 outputs = Conv1D(filters=3, kernel_size=self.kernel_size, activation='tanh', padding='same', name='decoder_output')(x) path = os.path.join(self.model_dir, "decoder.png") # instantiate decoder model self.decoder = Model(latent_inputs, outputs, name='decoder') self.decoder.summary() plot_model(self.decoder, to_file=path, show_shapes=True) def loss(self, gt, pred): from tf_ops.emd import tf_auctionmatch from tf_ops.sampling import tf_sampling #from tf_ops.CD import tf_nndistance from structural_losses import tf_nndistance # from structural_losses.tf_approxmatch import approx_match, match_cost if self.emd: matchl_out, matchr_out = tf_auctionmatch.auction_match(pred, gt) matched_out = tf_sampling.gather_point(gt, matchl_out) emd_loss = tf.reshape((pred - matched_out)**2, shape=(self.batch_size, -1)) emd_loss = tf.reduce_mean(emd_loss, axis=1, keepdims=True) return emd_loss else: #cost_p1_p2, _, cost_p2_p1, _ = nn_distance(self.x_reconstr, self.gt) #self.loss = tf.reduce_mean(cost_p1_p2) + tf.reduce_mean(cost_p2_p1) p1top2, _, p2top1, _ = tf_nndistance.nn_distance(pred, gt) #p1top2 is for each element in gt, the cloest distance to this element # cd_loss = p1top2 + p2top1 cd_loss = K.mean(p1top2) + K.mean(p2top1) # cd_loss = K.mean(cd_loss) return cd_loss def build_ae(self): shape, filters = self.build_encoder() decoder = self.build_decoder_mlp() outputs = self.decoder(self.encoder(self.inputs)) self.ae = Model(self.inputs, outputs, name='ae') self.ae.summary() #if not self.evaluate: # self.ae.add_loss(self.loss) optimizer = RMSprop(lr=self.lr) if not self.evaluate: self.ae.compile(optimizer=optimizer, loss=self.loss) path = os.path.join(self.model_dir, "ae.png") plot_model(self.ae, to_file=path, show_shapes=True) print("Learning rate: ", self.lr) def train_ae(self): save_interval = 500 print_interval = 100 start_time = datetime.datetime.now() loss = 0.0 epochs = 30 train_steps = self.train_steps * epochs for step in range(train_steps): _, pc = self.train_source.next_batch() pc = pc / 0.5 metrics = self.ae.train_on_batch(x=pc, y=pc) loss += metrics if (step + 1) % print_interval == 0: elapsed_time = datetime.datetime.now() - start_time loss /= print_interval pcent = step * 100.0 / train_steps fmt = "%02.4f%%/%06d:[loss:%02.6f time:%s]" log = fmt % (pcent, step + 1, loss, elapsed_time) # log = "%d: [loss: %0.6f] [time: %s]" % (step + 1, loss, elapsed_time) print(log) loss = 0.0 if (step + 1) % save_interval == 0: prefix = self.category + "-" + "pt-cloud-stacked-ae" if self.emd: prefix += "-emd" else: prefix += "-chamfer" prefix += "-" + str(self.kernel_size) weights_dir = "weights" save_weights(self.encoder, "encoder", weights_dir, self.latent_dim, prefix=prefix) save_weights(self.decoder, "decoder", weights_dir, self.latent_dim, prefix=prefix) save_weights(self.ae, "ae", weights_dir, self.latent_dim, prefix=prefix) def stop_sources(self): self.train_source.close() self.test_source.close() def __del__(self): self.stop_sources()