def _benchmark_eager_apply(self, label, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) if defun: model.call = tfe.defun(model.call, compiled=compiled) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size, data_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: tfe.async_wait() gc.collect() start = time.time() for _ in xrange(num_iters): model(images, training=False).cpu() if execution_mode: tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format)
def evaluate(defun=False): model = mnist.create_model(data_format()) dataset = random_dataset() if defun: model.call = tfe.defun(model.call) with tf.device(device()): mnist_eager.test(model, dataset)
def _benchmark_eager_train(self, label, make_iterator, defun=False): device, data_format = device_and_data_format() for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size) num_burn = 3 num_iters = 10 model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) optimizer = tf.train.GradientDescentOptimizer(0.1) with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) self._force_gpu_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) self._force_gpu_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def train(defun=False): model = mnist.Model(data_format()) if defun: model.call = tfe.defun(model.call) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) dataset = random_dataset() with tf.device(device()): mnist_eager.train(model, optimizer, dataset)
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) optimizer = tf.train.GradientDescentOptimizer(0.1) apply_grads = apply_gradients if defun: model.call = tfe.defun(model.call, compiled=compiled) apply_grads = tfe.defun(apply_gradients, compiled=compiled) num_burn = 3 num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def train(defun=False): model = mnist.create_model(data_format()) if defun: model.call = tfe.defun(model.call) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) dataset = random_dataset() with tf.device(device()): mnist_eager.train(model, optimizer, dataset, step_counter=tf.train.get_or_create_global_step())
def _apply(self, defun=False): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) with tf.device(device): images, _ = random_batch(2) output = model(images) self.assertEqual((2, 1000), output.shape)
def _apply(self, defun=False, execution_mode=None): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) with tf.device(device), tfe.execution_mode(execution_mode): images, _ = random_batch(2, data_format) output = model(images, training=False) tfe.async_wait() self.assertEqual((2, 1000), output.shape)
def train(defun=False): model = mnist.Model(data_format()) if defun: model.call = tfe.defun(model.call) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) dataset = random_dataset() with tf.device(device()): mnist_eager.train(model, optimizer, dataset, step_counter=tf.train.get_or_create_global_step())
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) model = resnet50.ResNet50(data_format) optimizer = tf.train.GradientDescentOptimizer(0.1) apply_grads = apply_gradients if defun: model.call = tfe.defun(model.call, compiled=compiled) apply_grads = tfe.defun(apply_gradients, compiled=compiled) num_burn = 3 num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def __init__(self, generator: k.models.Model, discriminator: k.models.Model, hyper: Dict) -> None: learning_rate = hyper["learning_rate"] beta1 = hyper["beta1"] self.generator_optimizer = tf.train.AdamOptimizer(learning_rate, beta1) self.discriminator_optimizer = tf.train.AdamOptimizer( learning_rate, beta1) self.generator = generator() self.generator.call = tfe.defun(self.generator.call) self.discriminator = discriminator() self.discriminator.call = tfe.defun(self.discriminator.call) model_dir = "./logs" self.checkpoint_prefix = os.path.join(model_dir, "ckpt") self.checkpoint = tf.train.Checkpoint( generator_optimizer=self.generator_optimizer, discriminator_optimizer=self.discriminator_optimizer, generator=self.generator, discriminator=self.discriminator, ) self.summary_writer = tf.contrib.summary.create_file_writer( model_dir, flush_millis=10000)
def _benchmark_eager_apply(self, label, defun=False): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size) for _ in xrange(num_burn): model(images).cpu() gc.collect() start = time.time() for _ in xrange(num_iters): model(images).cpu() self._report(label, start, num_iters, device, batch_size, data_format)
def _benchmark_eager(self, defun=False): """Benchmark Eager performance.""" hparams = get_default_hparams() for sample_size in [10, 25, 50, 100, 200]: hparams.n_samples = sample_size energy_fn, _, _ = l2hmc.get_scg_energy_fn() dynamics = l2hmc.Dynamics( x_dim=hparams.x_dim, minus_loglikelihood_fn=energy_fn, n_steps=hparams.n_steps, eps=hparams.eps) optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) step_fn = tfe.defun(step) if defun else step # Warmup to reduce initialization effect when timing warmup( dynamics, optimizer, n_iters=hparams.n_warmup_iters, n_samples=hparams.n_samples, step_fn=step_fn) # Training samples = tf.random_normal( shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32) start_time = time.time() fit(dynamics, samples, optimizer, step_fn=step_fn, n_iters=hparams.n_iters) wall_time = (time.time() - start_time) / hparams.n_iters examples_per_sec = hparams.n_samples / wall_time self.report_benchmark( name="eager_train_%s%s_%d" % ("gpu" if tf.test.is_gpu_available() else "cpu", "_defun" if defun else "", sample_size), iters=hparams.n_iters, extras={"examples_per_sec": examples_per_sec}, wall_time=wall_time) del dynamics
def _benchmark_eager(self, defun=False): """Benchmark Eager performance.""" hparams = get_default_hparams() for sample_size in [10, 25, 50, 100, 200]: hparams.n_samples = sample_size energy_fn, _, _ = l2hmc.get_scg_energy_fn() dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim, minus_loglikelihood_fn=energy_fn, n_steps=hparams.n_steps, eps=hparams.eps) optimizer = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate) step_fn = tfe.defun(step) if defun else step # Warmup to reduce initialization effect when timing warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters, n_samples=hparams.n_samples, step_fn=step_fn) # Training samples = tf.random_normal( shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32) start_time = time.time() fit(dynamics, samples, optimizer, step_fn=step_fn, n_iters=hparams.n_iters) wall_time = (time.time() - start_time) / hparams.n_iters examples_per_sec = hparams.n_samples / wall_time self.report_benchmark( name="eager_train_%s%s_%d" % ("gpu" if tf.test.is_gpu_available() else "cpu", "_defun" if defun else "", sample_size), iters=hparams.n_iters, extras={"examples_per_sec": examples_per_sec}, wall_time=wall_time) del dynamics
def _benchmark_eager(self, defun=False): """Benchmark Eager performance.""" hparams = get_default_hparams() energy_fn = self._get_energy_fn() dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim, loglikelihood_fn=energy_fn, n_steps=hparams.n_steps, eps=hparams.eps) optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) loss_fn = tfe.defun(compute_loss) if defun else compute_loss # Warmup to reduce initialization effect when timing warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters, loss_fn=loss_fn) # Training samples = tf.random_normal(shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32) start_time = time.time() fit(dynamics, samples, optimizer, loss_fn=loss_fn, n_iters=hparams.n_iters, decay_lr=True) wall_time = time.time() - start_time examples_per_sec = hparams.n_samples / wall_time self.report_benchmark(name="eager_train_%s%s" % ("gpu" if tf.test.is_gpu_available() else "cpu", "_defun" if defun else ""), iters=hparams.n_iters, extras={"examples_per_sec": examples_per_sec}, wall_time=wall_time) del dynamics del loss_fn
def _benchmark_eager_apply(self, label, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call, compiled=compiled) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size, data_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: tfe.async_wait() gc.collect() start = time.time() for _ in xrange(num_iters): model(images, training=False).cpu() if execution_mode: tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format)
def __init__(self, policy): super().__init__() self.policy_type = policy['type'] self.policy_name = policy['name'] if self.policy_type == 'gcnn': model = policy['model'] model.restore_state(policy['parameters']) self.policy = tfe.defun(model.call, input_signature=model.input_signature) elif self.policy_type == 'internal': self.policy = policy['name'] elif self.policy_type == 'ml-competitor': self.policy = policy['model'] # feature parameterization self.feat_shift = policy['feat_shift'] self.feat_scale = policy['feat_scale'] self.feat_specs = policy['feat_specs'] else: raise NotImplementedError
def benchmark_eight_schools_hmc( num_results=int(5e3), num_burnin_steps=int(3e3), num_leapfrog_steps=3, step_size=0.4): """Runs HMC on the eight-schools unnormalized posterior.""" num_schools = 8 treatment_effects = tf.constant( [28, 8, -3, 7, -1, 1, 18, 12], dtype=np.float32, name='treatment_effects') treatment_stddevs = tf.constant( [15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32, name='treatment_stddevs') def unnormalized_posterior_log_prob( avg_effect, avg_stddev, school_effects_standard): """Eight-schools unnormalized log posterior.""" return eight_schools_joint_log_prob( treatment_effects, treatment_stddevs, avg_effect, avg_stddev, school_effects_standard) sample_chain = tfe.defun(tfp.mcmc.sample_chain) executing_eagerly = tf.executing_eagerly() def computation(): """The benchmark computation.""" _, kernel_results = sample_chain( num_results=num_results, num_burnin_steps=num_burnin_steps, current_state=( tf.zeros([], name='init_avg_effect'), tf.zeros([], name='init_avg_stddev'), tf.ones([num_schools], name='init_school_effects_standard'), ), kernel=tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps)) return kernel_results.is_accepted # warm-up is_accepted_tensor = computation() if not executing_eagerly: session = tf.Session() session.run(is_accepted_tensor) start_time = time.time() if executing_eagerly: is_accepted = computation() else: is_accepted = session.run(is_accepted_tensor) wall_time = time.time() - start_time num_accepted = np.sum(is_accepted) acceptance_rate = np.float32(num_accepted) / np.float32(num_results) return dict( iters=(num_results + num_burnin_steps) * num_leapfrog_steps, extras={'acceptance_rate': acceptance_rate}, wall_time=wall_time)
break image = image.numpy().reshape((28, 28)) plt.figure() plt.imshow(image, cmap=plt.cm.binary) plt.colorbar() plt.grid(False) plt.show() model = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28, 1)), tf.keras.layers.Dense(128, activation=tf.nn.relu), tf.keras.layers.Dense(10, activation=tf.nn.softmax) ]) model.call = tfe.defun(model.call) def loss(model, inputs, labels): logits = model(inputs) return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) def grad(model, inputs, targets): with tf.GradientTape() as tape: loss_value = loss(model, inputs, targets) return loss_value, tape.gradient(loss_value, model.trainable_weights) def accuracy(logits, labels):
def defun_neural_ode(node: NeuralODE) -> NeuralODE: node.forward = tfe.defun(node.forward) node.backward = tfe.defun(node.backward) node.forward_odeint = tfe.defun(node.forward_odeint) return node
### TRAINING LOOP ### optimizer = tf.train.AdamOptimizer( learning_rate=lambda: lr) # dynamic LR trick best_loss = np.inf for epoch in range(max_epochs + 1): log(f"EPOCH {epoch}...", logfile) epoch_loss_avg = tfe.metrics.Mean() epoch_accuracy = tfe.metrics.Accuracy() # TRAIN if epoch == 0: n = pretrain(model=model, dataloader=pretrain_data) log(f"PRETRAINED {n} LAYERS", logfile) # model compilation model.call = tfe.defun(model.call, input_signature=model.input_signature) else: # bugfix: tensorflow's shuffle() seems broken... epoch_train_files = rng.choice(train_files, epoch_size * batch_size, replace=True) train_data = tf.data.Dataset.from_tensor_slices(epoch_train_files) train_data = train_data.batch(batch_size) train_data = train_data.map(load_batch_tf) train_data = train_data.prefetch(1) train_loss, train_kacc = process(model, train_data, top_k, optimizer) log( f"TRAIN LOSS: {train_loss:0.3f} " + "".join([ f" acc@{k}: {acc:0.3f}" for k, acc in zip(top_k, train_kacc)
policy['name'] = policy_name policy['type'] = policy_type if policy['type'] == 'gcnn': # load model sys.path.insert( 0, os.path.abspath(f"models/{policy['name']}")) import model importlib.reload(model) del sys.path[0] policy['model'] = model.GCNPolicy() policy['model'].restore_state( f"trained_models/{args.problem}/{policy['name']}/{seed}/best_params.pkl" ) policy['model'].call = tfe.defun( policy['model'].call, input_signature=policy['model'].input_signature) policy['batch_datatypes'] = [ tf.float32, tf.int32, tf.float32, tf.float32, tf.int32, tf.int32, tf.int32, tf.int32, tf.int32, tf.float32 ] policy['batch_fun'] = load_batch_gcnn else: # load feature normalization parameters try: with open( f"trained_models/{args.problem}/{policy['name']}/{seed}/normalization.pkl", 'rb') as f: policy['feat_shift'], policy[ 'feat_scale'] = pickle.load(f) except:
neural_ode = NeuralODE(model, t=t_in) def compute_gradients_and_update(batch_y0, batch_yN): """Takes start positions (x0, y0) and final positions (xN, yN)""" pred_y = neural_ode.forward(batch_y0) with tf.GradientTape() as g: g.watch(pred_y) loss = tf.reduce_sum((pred_y - batch_yN)**2) dLoss = g.gradient(loss, pred_y) h_start, dfdh0, dWeights = neural_ode.backward(pred_y, dLoss) return loss, dWeights # Compile EAGER graph to static (this will be much faster) compute_gradients_and_update = tfe.defun(compute_gradients_and_update) # function to compute the kinetic energy def kinetic_energy(V, loggamma_v, loglambda_v): q = (np.sum(-V**2) - loggamma_v**2 - loglambda_v**2) / 2.0 return q def compute_gradient_param(dWeights, loggamma, loglambda, batch_size, para_num): WW = model.trainable_weights[0].numpy() dWeights = np.exp(loggamma) / 2.0 * dWeights + np.exp( loglambda) * np.sign(WW) return dWeights def compute_gradient_hyper(loss, weights, loggamma, loglambda, batch_size, para_num):
def benchmark_eight_schools_hmc(num_results=int(5e3), num_burnin_steps=int(3e3), num_leapfrog_steps=3, step_size=0.4): """Runs HMC on the eight-schools unnormalized posterior.""" num_schools = 8 treatment_effects = tf.constant([28, 8, -3, 7, -1, 1, 18, 12], dtype=np.float32, name='treatment_effects') treatment_stddevs = tf.constant([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32, name='treatment_stddevs') def unnormalized_posterior_log_prob(avg_effect, avg_stddev, school_effects_standard): """Eight-schools unnormalized log posterior.""" return eight_schools_joint_log_prob(treatment_effects, treatment_stddevs, avg_effect, avg_stddev, school_effects_standard) sample_chain = tfe.defun(tfp.mcmc.sample_chain) executing_eagerly = tf.executing_eagerly() def computation(): """The benchmark computation.""" _, kernel_results = sample_chain( num_results=num_results, num_burnin_steps=num_burnin_steps, current_state=( tf.zeros([], name='init_avg_effect'), tf.zeros([], name='init_avg_stddev'), tf.ones([num_schools], name='init_school_effects_standard'), ), kernel=tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps)) return kernel_results.is_accepted # warm-up is_accepted_tensor = computation() if not executing_eagerly: session = tf.Session() session.run(is_accepted_tensor) start_time = time.time() if executing_eagerly: is_accepted = computation() else: is_accepted = session.run(is_accepted_tensor) wall_time = time.time() - start_time num_accepted = np.sum(is_accepted) acceptance_rate = np.float32(num_accepted) / np.float32(num_results) return dict(iters=(num_results + num_burnin_steps) * num_leapfrog_steps, extras={'acceptance_rate': acceptance_rate}, wall_time=wall_time)