def _worker(self, root_dir, parameters, device_queue): # sleep for random seconds to avoid crowded launching try: time.sleep(random.uniform(0, 3)) device = device_queue.get() if self._conf.use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = str(device) else: os.environ["CUDA_VISIBLE_DEVICES"] = "" # run on cpu from alf.utils.common import set_per_process_memory_growth set_per_process_memory_growth() logging.set_verbosity(logging.INFO) logging.info("parameters %s" % parameters) with gin.unlock_config(): gin.parse_config( ['%s=%s' % (k, v) for k, v in parameters.items()]) train_eval(root_dir) device_queue.put(device) except Exception as e: logging.info(e) raise e
z, kl_loss = encoder.sampling_forward(inputs) outputs = decoding_layers(z) loss = tf.reduce_mean(100 * loss_f(inputs - outputs) + kl_loss) model = tf.keras.Model(inputs, outputs, name="vae") model.add_loss(loss) model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.1)) model.summary() x_train = np.random.randn(10000, 1) x_val = np.random.randn(10000, 1) x_test = np.random.randn(10, 1) y_test = model(x_test.astype(np.float32)) hist = model.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, None)) y_test = model(x_test.astype(np.float32)) reconstruction_loss = float(tf.reduce_mean(loss_f(x_test - y_test))) print("reconstruction_loss:", reconstruction_loss) self.assertLess(reconstruction_loss, 0.05) if __name__ == '__main__': from alf.utils.common import set_per_process_memory_growth set_per_process_memory_growth() tf.test.main()