def test_normal_integral_mean_and_var_correctly_estimated(self): n = int(1000) # This test is almost identical to the similarly named test in # monte_carlo_test.py. The only difference is that we use the Halton # samples instead of the random samples to evaluate the expectations. # MC with pseudo random numbers converges at the rate of 1/ Sqrt(N) # (N=number of samples). For QMC in low dimensions, the expected convergence # rate is ~ 1/N. Hence we should only need 1e3 samples as compared to the # 1e6 samples used in the pseudo-random monte carlo. with self.test_session(): mu_p = array_ops.constant([-1.0, 1.0], dtype=dtypes.float64) mu_q = array_ops.constant([0.0, 0.0], dtype=dtypes.float64) sigma_p = array_ops.constant([0.5, 0.5], dtype=dtypes.float64) sigma_q = array_ops.constant([1.0, 1.0], dtype=dtypes.float64) p = normal_lib.Normal(loc=mu_p, scale=sigma_p) q = normal_lib.Normal(loc=mu_q, scale=sigma_q) cdf_sample = halton.sample(2, num_samples=n, dtype=dtypes.float64) q_sample = q.quantile(cdf_sample) # Compute E_p[X]. e_x = mc.expectation_importance_sampler( f=lambda x: x, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, seed=42) # Compute E_p[X^2]. e_x2 = mc.expectation_importance_sampler( f=math_ops.square, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, seed=42) stddev = math_ops.sqrt(e_x2 - math_ops.square(e_x)) # Keep the tolerance levels the same as in monte_carlo_test.py. self.assertEqual(p.batch_shape, e_x.get_shape()) self.assertAllClose(p.mean().eval(), e_x.eval(), rtol=0.01) self.assertAllClose(p.stddev().eval(), stddev.eval(), rtol=0.02)
def testConsistent(self): nums, divs = self.intTestData() with self.test_session(): tf_result = ( math_ops.floor_div(nums, divs) * divs + math_ops.floormod(nums, divs) ).eval() tf_nums = array_ops.constant(nums) tf_divs = array_ops.constant(divs) tf2_result = (tf_nums // tf_divs * tf_divs + tf_nums % tf_divs).eval() np_result = (nums // divs) * divs + (nums % divs) # consistentcy with numpy self.assertAllEqual(tf_result, np_result) # consistentcy with two forms of divide self.assertAllEqual(tf_result, tf2_result) # consistency for truncation form tf3_result = ( math_ops.truncatediv(nums, divs) * divs + math_ops.truncatemod(nums, divs) ).eval() expanded_nums = np.reshape(np.tile(nums, divs.shape[1]), (nums.shape[0], divs.shape[1])) # Consistent with desire to get numerator self.assertAllEqual(tf3_result, expanded_nums) # Consistent with desire to get numerator self.assertAllEqual(tf_result, expanded_nums)
def testScaleAndBiasAndIdentity(self): """This tests a scaled add which has 3 inputs and 2 outputs.""" a = array_ops.constant(1.) x = array_ops.constant([2., 3.]) b = array_ops.constant([4., 5.]) def _scaled_and_bias_and_identity(a, x, b): custom = op_hint.OpHint("scale_and_bias_and_identity") a, x, b = custom.add_inputs(a, x, b) return custom.add_outputs(a * x + b, x) output = array_ops.identity(_scaled_and_bias_and_identity(a, x, b), name="ModelOutput") with self.cached_session() as sess: # make sure one identity for each input (3) and output (2) => 3 + 2 = 5 # +1 for the final output self.assertEqual(self._countIdentities(sess.graph_def.node), 6) stubbed_graphdef = op_hint.convert_op_hints_to_stubs( graph_def=sess.graph_def) self.assertEqual( self._getGraphOpTypes( stubbed_graphdef, output_nodes=[op_hint._tensor_name_base(output.name)]), set(["scale_and_bias_and_identity", "Const", "Identity", "Pack"]))
def report_uninitialized_variables(var_list=None, name="report_uninitialized_variables"): """Adds ops to list the names of uninitialized variables. When run, it returns a 1-D tensor containing the names of uninitialized variables if there are any, or an empty array if there are none. Args: var_list: List of `Variable` objects to check. Defaults to the value of `all_variables() + local_variables()` name: Optional name of the `Operation`. Returns: A 1-D tensor containing names of the unintialized variables, or an empty 1-D tensor if there are no variables or no uninitialized variables. """ if var_list is None: var_list = all_variables() + local_variables() # Backwards compatibility for old-style variables. TODO(touts): remove. if not var_list: var_list = [] for op in ops.get_default_graph().get_operations(): if op.type in ["Variable", "AutoReloadVariable"]: var_list.append(op.outputs[0]) if not var_list: # Return an empty tensor so we only need to check for returned tensor # size being 0 as an indication of model ready. return array_ops.constant([], dtype=dtypes.string, name=name) else: # Get a 1-D boolean tensor listing whether each variable is initialized. variables_mask = math_ops.logical_not(array_ops.pack([state_ops.is_variable_initialized(v) for v in var_list])) # Get a 1-D string tensor containing all the variable names. variable_names_tensor = array_ops.constant([s.op.name for s in var_list]) # Return a 1-D tensor containing all the names of uninitialized variables. return array_ops.boolean_mask(variable_names_tensor, variables_mask, name=name)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) grads = (array_ops.constant([2., 3.]), array_ops.constant(4.)) damping = 0.5 block.instantiate_factors((grads,), damping) block._factor.instantiate_cov_variables() block.register_inverse() block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(state_ops.assign(block._factor._cov, _make_psd(3))) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testFullFBInitTensorTuple(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params, 32) self.assertAllEqual(params, block.tensors_to_compute_grads())
def testScopeStringFromParamsMultipleTypes(self): with tf_ops.Graph().as_default(): x = array_ops.constant(1,) y = array_ops.constant(2,) scope_string = ff.scope_string_from_params([[1, 2, 3], 'foo', True, 4, (x, y)]) self.assertEqual('1-2-3_foo_True_4_Const__Const_1', scope_string)
def testOptimizerInit(self): with ops.Graph().as_default(): layer_collection = lc.LayerCollection() inputs = array_ops.ones((2, 1)) * 2 weights_val = np.ones((1, 1), dtype=np.float32) * 3. weights = variable_scope.get_variable( 'w', initializer=array_ops.constant(weights_val)) bias = variable_scope.get_variable( 'b', initializer=init_ops.zeros_initializer(), shape=(1, 1)) output = math_ops.matmul(inputs, weights) + bias layer_collection.register_fully_connected((weights, bias), inputs, output) logits = math_ops.tanh(output) targets = array_ops.constant([[0.], [1.]]) output = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets)) layer_collection.register_categorical_predictive_distribution(logits) optimizer.KfacOptimizer( 0.1, 0.2, 0.3, layer_collection, momentum=0.5, momentum_type='regular')
def test_mixture_dev(self): mixture_weights = np.array([ [1.0/3, 1.0/3, 1.0/3], [0.750, 0.250, 0.000] ]) component_means = np.array([ [1.0, 1.0, 1.0], [-5, 0, 1.25] ]) component_devs = np.array([ [1.0, 1.0, 1.0], [0.01, 2.0, 0.1] ]) # The first case should trivially have a standard deviation of 1.0 because # all components are identical and have that standard deviation. # The second case was computed by hand. expected_devs = np.array([ 1.0, 2.3848637277 ]) weights_tf = array_ops.constant(mixture_weights) means_tf = array_ops.constant(component_means) sigmas_tf = array_ops.constant(component_devs) mix_dev = distribution_util.mixture_stddev(weights_tf, means_tf, sigmas_tf) with self.test_session() as sess: actual_devs = sess.run(mix_dev) self.assertAllClose(actual_devs, expected_devs)
def testMakeSparseSplitAllEmptyDimensions(self): """Tests split handler op when all dimensions have only bias bucket id.""" with self.test_session() as sess: # The data looks like the following after dividing by number of steps (2). # Gradients | Partition | Dimension | bucket ID | # (0.9, 0.39) | 0 | 0 | -1 | # (4.0, 0.13) | 1 | 0 | -1 | partition_ids = array_ops.constant([0, 1], dtype=dtypes.int32) # We have only 1 dimension in our sparse feature column. bucket_ids = array_ops.constant([[-1, 0], [-1, 0]], dtype=dtypes.int64) gradients = array_ops.constant([1.8, 8.0]) hessians = array_ops.constant([0.78, 0.26]) bucket_boundaries = array_ops.constant([0.3, 0.52]) partitions, gains, splits = ( split_handler_ops.build_sparse_inequality_splits( num_minibatches=2, partition_ids=partition_ids, bucket_ids=bucket_ids, gradients=gradients, hessians=hessians, bucket_boundaries=bucket_boundaries, l1_regularization=0, l2_regularization=2, tree_complexity_regularization=0, min_node_weight=0, feature_column_group_id=0, bias_feature_id=-1, class_id=-1, multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)) partitions, gains, splits = (sess.run([partitions, gains, splits])) self.assertEqual(0, len(partitions)) self.assertEqual(0, len(splits))
def testSwishLiteHint(self): """Makes a custom op swish and makes sure it gets converted as a unit.""" image = array_ops.constant([1., 2., 3., 4.]) swish_scale = array_ops.constant(1.0) def _swish(input_tensor, scale): custom = op_hint.OpHint("cool_activation") input_tensor, scale = custom.add_inputs(input_tensor, scale) output = math_ops.sigmoid(input_tensor) * input_tensor * scale output, = custom.add_outputs(output) return output output = array_ops.identity(_swish(image, swish_scale), name="ModelOutput") with self.cached_session() as sess: # check if identities have been put into the graph (2 input, 1 output, # and 1 final output). self.assertEqual(self._countIdentities(sess.graph_def.node), 4) stubbed_graphdef = op_hint.convert_op_hints_to_stubs( graph_def=sess.graph_def) self.assertEqual( self._getGraphOpTypes( stubbed_graphdef, output_nodes=[op_hint._tensor_name_base(output.name)]), set(["cool_activation", "Const", "Identity"]))
def testTrackPersistentBytes(self): ops.reset_default_graph() a = array_ops.constant(np.ones((100, 100))) b = array_ops.constant(np.ones((100, 100))) c = a * b with session.Session() as sess: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) options = option_builder.ProfileOptionBuilder.time_and_memory() options['min_bytes'] = 0 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 'residual_bytes') ret = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) ret2 = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) n = lib.SearchTFProfNode(ret, 'mul') n2 = lib.SearchTFProfNode(ret2, 'mul') self.assertGreater(n.peak_bytes, 0) self.assertGreater(n.output_bytes, 0) self.assertGreater(n.residual_bytes, 0) self.assertEqual(n.peak_bytes, n2.peak_bytes) self.assertEqual(n.output_bytes, n2.output_bytes) self.assertEqual(n.residual_bytes, n2.residual_bytes)
def report_uninitialized_resources(resource_list=None, name="report_uninitialized_resources"): """Returns the names of all uninitialized resources in resource_list. If the returned tensor is empty then all resources have been initialized. Args: resource_list: resources to check. If None, will use shared_resources() + local_resources(). name: name for the resource-checking op. Returns: Tensor containing names of the handles of all resources which have not yet been initialized. """ if resource_list is None: resource_list = shared_resources() + local_resources() with ops.name_scope(name): # Run all operations on CPU with ops.device("/cpu:0"): if not resource_list: # Return an empty tensor so we only need to check for returned tensor # size being 0 as an indication of model ready. return array_ops.constant([], dtype=dtypes.string) # Get a 1-D boolean tensor listing whether each resource is initialized. variables_mask = math_ops.logical_not( array_ops.stack([r.is_initialized for r in resource_list])) # Get a 1-D string tensor containing all the resource names. variable_names_tensor = array_ops.constant( [s.handle.name for s in resource_list]) # Return a 1-D tensor containing all the names of uninitialized resources. return array_ops.boolean_mask(variable_names_tensor, variables_mask)
def testRegisterSingleParamRegisteredInTuple(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) lc = layer_collection.LayerCollection() lc.fisher_blocks = {(x, y): '1'} lc.register_block(x, 'foo') self.assertEqual(set(['1']), set(lc.get_blocks()))
def test_kernel_classifier_distance_block_sizes(self): """Test that `kernel_classifier_distance` works with unusual max_block_size values.. """ np.random.seed(0) test_pool_real_a = np.float32(np.random.randn(512, 256)) test_pool_gen_a = np.float32(np.random.randn(768, 256) * 1.1 + .05) max_block_size = array_ops.placeholder(dtypes.int32, shape=()) kid_op = _run_with_mock( classifier_metrics.kernel_classifier_distance_and_std_from_activations, array_ops.constant(test_pool_real_a), array_ops.constant(test_pool_gen_a), max_block_size=max_block_size) for block_size in [50, 512, 1000]: with self.cached_session() as sess: actual_kid, actual_std = sess.run(kid_op, {max_block_size: block_size}) expected_kid, expected_std = _expected_kid_and_std( test_pool_real_a, test_pool_gen_a, max_block_size=block_size) self.assertAllClose(expected_kid, actual_kid, 0.001) self.assertAllClose(expected_std, actual_std, 0.001)
def testAggregateGradients(self): def fn(x): ind1 = constant_op.constant(np.array([0, 1])) ind2 = constant_op.constant(np.array([2, 3])) ind3 = constant_op.constant(np.array([1, 3])) # A mixture of IndexedSlices and dense tensor to aggregate. g1 = embedding_ops.embedding_lookup(x, ind1) g2 = embedding_ops.embedding_lookup(x, ind2) g3 = embedding_ops.embedding_lookup(x, ind3) g4 = math_ops.reduce_sum(x * constant_op.constant(2.0)) return g1 * g2 * g3 * g4 var_np = np.random.rand(4, 2).astype(np.float32) var = constant_op.constant(var_np) grad = backprop.gradients_function(fn, [0])(var)[0] grad = self.evaluate(ops.convert_to_tensor(grad)) if not context.executing_eagerly(): tf_var = array_ops.constant(var_np, dtypes.float32) tf_ind1 = array_ops.constant([0, 1]) tf_ind2 = array_ops.constant([2, 3]) tf_ind3 = array_ops.constant([1, 3]) tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1)) tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4 tf_grad = gradients.gradients(tf_y, [tf_var])[0] tf_dense_grad = math_ops.unsorted_segment_sum( tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0]) self.assertAllClose(grad, self.evaluate(tf_dense_grad))
def testMultiplyInverseTuple(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) inputs = array_ops.constant([[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]) outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_minibatch(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads],), 0.5) # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = ( np.arange(2, 6).reshape(2, 2).astype(np.float32), # np.arange(1, 3).reshape(2, 1).astype(np.float32)) output = block.multiply_inverse((array_ops.constant(vector[0]), array_ops.constant(vector[1]))) output = sess.run(output) self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]], output[0]) self.assertAllClose([0.343146, 0.686291], output[1])
def testMultiplyInverseTuple(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = random_ops.random_normal((2, 2, 2, 2)) inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), 'SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads],), 0.5) # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), np.arange(2, 4).reshape(2, 1).astype(np.float32)) output = block.multiply_inverse((array_ops.constant(vector[0]), array_ops.constant(vector[1]))) output = sess.run(output) self.assertAllClose([0.136455, 0.27291], output[0][0]) self.assertAllClose([0.27291, 0.409365], output[1])
def testAggregateGradients(self): def fn(x): ind1 = tensor.Tensor(np.array([0, 1])) ind2 = tensor.Tensor(np.array([2, 3])) ind3 = tensor.Tensor(np.array([1, 3])) # A mixture of IndexedSlices and dense tensor to aggregate. g1 = embedding_ops.embedding_lookup(x, ind1) g2 = embedding_ops.embedding_lookup(x, ind2) g3 = embedding_ops.embedding_lookup(x, ind3) g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0)) return g1 * g2 * g3 * g4 var_np = np.random.rand(4, 2).astype(np.float32) var = tensor.Tensor(var_np) grad = backprop.gradients_function(fn, [0])(var)[0] with context.graph_mode(), self.test_session(): tf_var = array_ops.constant(var_np, dtypes.float32) tf_ind1 = array_ops.constant([0, 1]) tf_ind2 = array_ops.constant([2, 3]) tf_ind3 = array_ops.constant([1, 3]) tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1)) tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4 tf_grad = gradients.gradients(tf_y, [tf_var])[0] tf_dense_grad = math_ops.unsorted_segment_sum( tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0]) self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
def testColumnToTensors(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) vector_template = array_ops.constant(np.array([[0., 1.], [2., 3.]])) colvec = array_ops.constant(np.arange(4.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertAllClose(output, np.array([[0., 1.], [2., 3.]])) vector_template = self._fully_connected_layer_params() colvec = array_ops.constant(np.arange(6.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 2) a, b = output self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) self.assertAllClose(b, np.array([4., 5.])) vector_template = list(vector_template) vector_template.append(array_ops.constant([[6.], [7.], [8.], [9.]])) colvec = array_ops.constant(np.arange(10.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 3) a, b, c = output self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) self.assertAllClose(b, np.array([4., 5.])) self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]]))
def test_parameter_switching(self): parameter = array_ops.constant(5) overridden_parameter = array_ops.constant(3) with self.cached_session(): getter = model_utils.parameter_switch({overridden_parameter: 4}) self.assertEqual(5, getter(parameter)) self.assertEqual(4, getter(overridden_parameter))
def testUpdateClipCoeff(self): with ops.Graph().as_default(), self.test_session() as sess: grads_and_vars = [(array_ops.constant([[1., 2.], [3., 4.]]), None), (array_ops.constant([[2., 3.], [4., 5.]]), None)] pgrads_and_vars = [(array_ops.constant([[3., 4.], [5., 6.]]), None), (array_ops.constant([[7., 8.], [9., 10.]]), None)] lrate = 0.1 # Note: without rescaling, the squared Fisher norm of the update # is 1.74 # If the update already satisfies the norm constraint, there should # be no rescaling. opt = optimizer.KfacOptimizer( lrate, 0.2, 0.3, dummy_layer_collection(), norm_constraint=10.) coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars) self.assertAlmostEqual(1., sess.run(coeff), places=5) # If the update violates the constraint, it should be rescaled to # be on the constraint boundary. opt = optimizer.KfacOptimizer( lrate, 0.2, 0.3, dummy_layer_collection(), norm_constraint=0.5) coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars) sq_norm_pgrad = opt._squared_fisher_norm(grads_and_vars, pgrads_and_vars) sq_norm_update = lrate**2 * coeff**2 * sq_norm_pgrad self.assertAlmostEqual(0.5, sess.run(sq_norm_update), places=5)
def testMakeDenseSplitEmptyInputs(self): """Tests empty inputs op.""" with self.test_session() as sess: partition_ids = array_ops.constant([], dtype=dtypes.int32) bucket_ids = array_ops.constant([[]], dtype=dtypes.int64) gradients = array_ops.constant([]) hessians = array_ops.constant([]) bucket_boundaries = [0.3, 0.52] partitions, gains, splits = ( split_handler_ops.build_dense_inequality_splits( num_minibatches=0, partition_ids=partition_ids, bucket_ids=bucket_ids, gradients=gradients, hessians=hessians, bucket_boundaries=bucket_boundaries, l1_regularization=0.1, l2_regularization=1, tree_complexity_regularization=0, min_node_weight=0, class_id=-1, feature_column_group_id=0, multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)) partitions, gains, splits = sess.run([partitions, gains, splits]) # .assertEmpty doesn't exist on ubuntu-contrib self.assertEqual(0, len(partitions)) self.assertEqual(0, len(gains)) self.assertEqual(0, len(splits))
def testAggregate(self): a = array_ops.constant([3., 4.]) b = array_ops.constant([5., 6.]) hint = op_hint.OpHint("agg") a0, a1 = array_ops.unstack(a) b0, b1 = array_ops.unstack(b) a0 = hint.add_input(a0, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK) b0 = hint.add_input(b0, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK) a1 = hint.add_input(a1, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK) b1 = hint.add_input(b1, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK) c0 = math_ops.add(a0, b0, name="addleft") c1 = math_ops.add(a1, b1, name="addright") c0 = hint.add_output( c0, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK) c1 = hint.add_output( c1, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK) curr = array_ops.stack([c0, c1]) output = array_ops.identity(curr, name="FINAL_OUTPUT") with self.cached_session() as sess: stubbed_graphdef = op_hint.convert_op_hints_to_stubs( graph_def=sess.graph_def) self.assertEqual( self._getGraphOpTypes( stubbed_graphdef, output_nodes=[op_hint._tensor_name_base(output.name)]), set(["agg", "Const", "Identity"]))
def testRegisterSingleParamRegisteredInTuple(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) lc = layer_collection.LayerCollection() lc.fisher_blocks = {(x, y): '1'} with self.assertRaises(ValueError) as cm: lc.register_block(x, 'foo') self.assertIn('was already registered', str(cm.exception))
def testFullFBInitSingleTensor(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) self.assertAllEqual(params, block.tensors_to_compute_grads())
def testFullyConnectedSeriesFBInit(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) inputs = array_ops.constant([1., 2.]) outputs = array_ops.constant([3., 4.]) block = fb.FullyConnectedSeriesFB( lc.LayerCollection(), inputs=[inputs], outputs=[outputs]) self.assertAllEqual([outputs], block.tensors_to_compute_grads())
def testRegisterTupleParamRegistered(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) lc = layer_collection.LayerCollection() lc.fisher_blocks = {(x, y): '1'} with self.assertRaises(ValueError): lc.register_block((x, y), 'foo')
def testRegisterSingleParamNotRegistered(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) lc = layer_collection.LayerCollection() lc.fisher_blocks = { variable_scope.get_variable('y', initializer=array_ops.constant(1,)): '1' } lc.register_block(x, 'foo')
def testRepeatedAdds(self): a = array_ops.constant([[1., 2.], [3., 4.]]) b = array_ops.constant([[5., 6.], [7., 8.]]) c = a + b + a # note that a appears twice in this graph sub_graph = utils.SubGraph((c,)) self.assertTrue(sub_graph.is_member(a)) self.assertTrue(sub_graph.is_member(b)) self.assertTrue(sub_graph.is_member(c))
def run(self): return {"my_output": array_ops.constant(1.0)}
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if not input_shape.ndims: raise ValueError('Input has undefined rank:', input_shape) ndims = len(input_shape) # Convert axis to list and resolve negatives if isinstance(self.axis, int): self.axis = [self.axis] for idx, x in enumerate(self.axis): if x < 0: self.axis[idx] = ndims + x # Validate axes for x in self.axis: if x < 0 or x >= ndims: raise ValueError('Invalid axis: %d' % x) if len(self.axis) != len(set(self.axis)): raise ValueError('Duplicate axis: %s' % self.axis) if self.virtual_batch_size is not None: if self.virtual_batch_size <= 0: raise ValueError('virtual_batch_size must be a positive integer that ' 'divides the true batch size of the input Tensor') # If using virtual batches, the first dimension must be the batch # dimension and cannot be the batch norm axis if 0 in self.axis: raise ValueError('When using virtual_batch_size, the batch dimension ' 'must be 0 and thus axis cannot include 0') if self.adjustment is not None: raise ValueError('When using virtual_batch_size, adjustment cannot ' 'be specified') if self.fused in (None, True): # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the # output back to its original shape accordingly. if self._USE_V2_BEHAVIOR: if self.fused is None: self.fused = (ndims == 4) elif self.fused and ndims != 4: raise ValueError('Batch normalization layers with fused=True only ' 'support 4D input tensors.') else: assert self.fused is not None self.fused = (ndims == 4 and self._fused_can_be_used()) # TODO(chrisying): fused batch norm is currently not supported for # multi-axis batch norm and by extension virtual batches. In some cases, # it might be possible to use fused batch norm but would require reshaping # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is # particularly tricky. A compromise might be to just support the most # common use case (turning 5D w/ virtual batch to NCHW) if self.fused: if self.axis == [1]: self._data_format = 'NCHW' elif self.axis == [3]: self._data_format = 'NHWC' else: raise ValueError('Unsupported axis, fused batch norm only supports ' 'axis == [1] or axis == [3]') # Raise parameters of fp16 batch norm to fp32 if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16: param_dtype = dtypes.float32 else: param_dtype = self.dtype or dtypes.float32 axis_to_dim = {x: input_shape.dims[x].value for x in self.axis} for x in axis_to_dim: if axis_to_dim[x] is None: raise ValueError('Input has undefined `axis` dimension. Input shape: ', input_shape) self.input_spec = InputSpec(ndim=ndims, axes=axis_to_dim) if len(axis_to_dim) == 1 and self.virtual_batch_size is None: # Single axis batch norm (most common/default use-case) param_shape = (list(axis_to_dim.values())[0],) else: # Parameter shape is the original shape but with 1 in all non-axis dims param_shape = [axis_to_dim[i] if i in axis_to_dim else 1 for i in range(ndims)] if self.virtual_batch_size is not None: # When using virtual batches, add an extra dim at index 1 param_shape.insert(1, 1) for idx, x in enumerate(self.axis): self.axis[idx] = x + 1 # Account for added dimension if self.scale: self.gamma = self.add_weight( name='gamma', shape=param_shape, dtype=param_dtype, initializer=self.gamma_initializer, regularizer=self.gamma_regularizer, constraint=self.gamma_constraint, trainable=True) else: self.gamma = None if self.fused: self._gamma_const = array_ops.constant( 1.0, dtype=param_dtype, shape=param_shape) if self.center: self.beta = self.add_weight( name='beta', shape=param_shape, dtype=param_dtype, initializer=self.beta_initializer, regularizer=self.beta_regularizer, constraint=self.beta_constraint, trainable=True) else: self.beta = None if self.fused: self._beta_const = array_ops.constant( 0.0, dtype=param_dtype, shape=param_shape) try: # Disable variable partitioning when creating the moving mean and variance if hasattr(self, '_scope') and self._scope: partitioner = self._scope.partitioner self._scope.set_partitioner(None) else: partitioner = None self.moving_mean = self.add_weight( name='moving_mean', shape=param_shape, dtype=param_dtype, initializer=self.moving_mean_initializer, synchronization=tf_variables.VariableSynchronization.ON_READ, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN) self.moving_variance = self.add_weight( name='moving_variance', shape=param_shape, dtype=param_dtype, initializer=self.moving_variance_initializer, synchronization=tf_variables.VariableSynchronization.ON_READ, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN) if self.renorm: # Create variables to maintain the moving mean and standard deviation. # These are used in training and thus are different from the moving # averages above. The renorm variables are colocated with moving_mean # and moving_variance. # NOTE: below, the outer `with device` block causes the current device # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): var = self.add_weight( name=name, shape=shape, dtype=param_dtype, initializer=init_ops.zeros_initializer(), synchronization=tf_variables.VariableSynchronization.ON_READ, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN) return var with distribution_strategy_context.get_distribution_strategy( ).colocate_vars_with(self.moving_mean): self.renorm_mean = _renorm_variable('renorm_mean', param_shape) self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) # We initialize renorm_stddev to 0, and maintain the (0-initialized) # renorm_stddev_weight. This allows us to (1) mix the average # stddev with the minibatch stddev early in training, and (2) compute # the unbiased average stddev by dividing renorm_stddev by the weight. with distribution_strategy_context.get_distribution_strategy( ).colocate_vars_with(self.moving_variance): self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape) self.renorm_stddev_weight = _renorm_variable('renorm_stddev_weight', ()) finally: if partitioner: self._scope.set_partitioner(partitioner) self.built = True
def run(self, inp): # Here the keys are not ordered lexicographically on purpose. return { "output_b": array_ops.constant(1.0), "output_a": inp + inp * inp }
def testGenerateFeatureSplitCandidatesMulticlass(self): with self.cached_session() as sess: # Batch size is 4, 2 gradients per each instance. gradients = array_ops.constant( [[0.2, 0.1], [-0.5, 0.2], [1.2, 3.4], [4.0, -3.5]], shape=[4, 2]) # 2x2 matrix for each instance hessian_0 = [[0.12, 0.02], [0.3, 0.11]] hessian_1 = [[0.07, -0.2], [-0.5, 0.2]] hessian_2 = [[0.2, -0.23], [-0.8, 0.9]] hessian_3 = [[0.13, -0.3], [-1.5, 2.2]] hessians = array_ops.constant( [hessian_0, hessian_1, hessian_2, hessian_3]) partition_ids = [0, 0, 0, 1] indices = [[0, 0], [0, 1], [2, 0], [3, 0]] values = array_ops.constant([1, 2, 2, 1], dtype=dtypes.int64) hessians = array_ops.constant( [hessian_0, hessian_1, hessian_2, hessian_3]) partition_ids = array_ops.constant([0, 0, 0, 1], dtype=dtypes.int32) gradient_shape = tensor_shape.TensorShape([2]) hessian_shape = tensor_shape.TensorShape([2, 2]) class_id = -1 split_handler = categorical_split_handler.EqualitySplitHandler( l1_regularization=0.1, l2_regularization=1, tree_complexity_regularization=0, min_node_weight=0, sparse_int_column=sparse_tensor.SparseTensor( indices, values, [4, 1]), feature_column_group_id=0, gradient_shape=gradient_shape, hessian_shape=hessian_shape, multiclass_strategy=learner_pb2.LearnerConfig.FULL_HESSIAN, init_stamp_token=0) resources.initialize_resources(resources.shared_resources()).run() empty_gradients, empty_hessians = get_empty_tensors( gradient_shape, hessian_shape) example_weights = array_ops.ones([4, 1], dtypes.float32) update_1 = split_handler.update_stats_sync( 0, partition_ids, gradients, hessians, empty_gradients, empty_hessians, example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): are_splits_ready, partitions, gains, splits = ( split_handler.make_splits(0, 1, class_id)) are_splits_ready, partitions, gains, splits = (sess.run( [are_splits_ready, partitions, gains, splits])) self.assertTrue(are_splits_ready) self.assertAllEqual([0, 1], partitions) split_info = split_info_pb2.SplitInfo() split_info.ParseFromString(splits[0]) left_child = split_info.left_child.vector right_child = split_info.right_child.vector split_node = split_info.split_node.categorical_id_binary_split # Each leaf has 2 element vector. self.assertEqual(2, len(left_child.value)) self.assertEqual(2, len(right_child.value)) self.assertEqual(1, split_node.feature_id) split_info.ParseFromString(splits[1]) left_child = split_info.left_child.vector right_child = split_info.right_child.vector split_node = split_info.split_node.categorical_id_binary_split self.assertEqual(2, len(left_child.value)) self.assertEqual(0, len(right_child.value)) self.assertEqual(1, split_node.feature_id)
def kernel_classifier_distance_and_std_from_activations( real_activations, generated_activations, max_block_size=10, dtype=None): """Kernel "classifier" distance for evaluating a generative model. This methods computes the kernel classifier distance from activations of real images and generated images. This can be used independently of the kernel_classifier_distance() method, especially in the case of using large batches during evaluation where we would like to precompute all of the activations before computing the classifier distance, or if we want to compute multiple metrics based on the same images. It also returns a rough estimate of the standard error of the estimator. This technique is described in detail in https://arxiv.org/abs/1801.01401. Given two distributions P and Q of activations, this function calculates E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] where k is the polynomial kernel k(x, y) = ( x^T y / dimension + 1 )^3. This captures how different the distributions of real and generated images' visual features are. Like the Frechet distance (and unlike the Inception score), this is a true distance and incorporates information about the target images. Unlike the Frechet score, this function computes an *unbiased* and asymptotically normal estimator, which makes comparing estimates across models much more intuitive. The estimator used takes time quadratic in max_block_size. Larger values of max_block_size will decrease the variance of the estimator but increase the computational cost. This differs slightly from the estimator used by the original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. The estimate of the standard error will also be more reliable when there are more blocks, i.e. when max_block_size is smaller. NOTE: the blocking code assumes that real_activations and generated_activations are both in random order. If either is sorted in a meaningful order, the estimator will behave poorly. Args: real_activations: 2D Tensor containing activations of real data. Shape is [batch_size, activation_size]. generated_activations: 2D Tensor containing activations of generated data. Shape is [batch_size, activation_size]. max_block_size: integer, default 1024. The distance estimator splits samples into blocks for computational efficiency. Larger values are more computationally expensive but decrease the variance of the distance estimate. Having a smaller block size also gives a better estimate of the standard error. dtype: if not None, coerce activations to this dtype before computations. Returns: The Kernel Inception Distance. A floating-point scalar of the same type as the output of the activations. An estimate of the standard error of the distance estimator (a scalar of the same type). """ real_activations.shape.assert_has_rank(2) generated_activations.shape.assert_has_rank(2) real_activations.shape[1].assert_is_compatible_with( generated_activations.shape[1]) if dtype is None: dtype = real_activations.dtype assert generated_activations.dtype == dtype else: real_activations = math_ops.cast(real_activations, dtype) generated_activations = math_ops.cast(generated_activations, dtype) # Figure out how to split the activations into blocks of approximately # equal size, with none larger than max_block_size. n_r = array_ops.shape(real_activations)[0] n_g = array_ops.shape(generated_activations)[0] n_bigger = math_ops.maximum(n_r, n_g) n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size)) v_r = n_r // n_blocks v_g = n_g // n_blocks n_plusone_r = n_r - v_r * n_blocks n_plusone_g = n_g - v_g * n_blocks sizes_r = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_r], v_r), array_ops.fill([n_plusone_r], v_r + 1), ], 0) sizes_g = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_g], v_g), array_ops.fill([n_plusone_g], v_g + 1), ], 0) zero = array_ops.zeros([1], dtype=dtypes.int32) inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0) inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0) dim = math_ops.cast(tf.shape(real_activations)[1], dtype) def compute_kid_block(i): 'Compute the ith block of the KID estimate.' r_s = inds_r[i] r_e = inds_r[i + 1] r = real_activations[r_s:r_e] m = math_ops.cast(r_e - r_s, dtype) g_s = inds_g[i] g_e = inds_g[i + 1] g = generated_activations[g_s:g_e] n = math_ops.cast(g_e - g_s, dtype) k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3 k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3 k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3 return (-2 * math_ops.reduce_mean(k_rg) + (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) + (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1))) ests = functional_ops.map_fn(compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False) mn = math_ops.reduce_mean(ests) # nn_impl.moments doesn't use the Bessel correction, which we want here n_blocks_ = math_ops.cast(n_blocks, dtype) var = control_flow_ops.cond( math_ops.less_equal(n_blocks, 1), lambda: array_ops.constant(float('nan'), dtype=dtype), lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1)) return mn, math_ops.sqrt(var / n_blocks_)
def _event_shape_tensor(self): return array_ops.constant([], dtype=dtypes.int32)
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k=None, class_id=None, sample_weight=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value or a python list or tuple of float thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). top_k: Optional int, indicates that the positive labels should be limited to the top k predictions. class_id: Optional int, limits the prediction and labels to the class specified by this argument. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if variables_to_update is None: return y_true = math_ops.cast(y_true, dtype=dtypes.float32) y_pred = math_ops.cast(y_pred, dtype=dtypes.float32) [y_pred, y_true], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true], sample_weight) y_pred.shape.assert_is_compatible_with(y_true.shape) if not any( key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format( list(ConfusionMatrix), variables_to_update.keys())) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal( y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal( y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): if sample_weight is None: y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true) else: y_pred, y_true, sample_weight = ( tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true, sample_weight=sample_weight)) if top_k is not None: y_pred = _filter_top_k(y_pred, top_k) if class_id is not None: y_true = y_true[..., class_id] y_pred = y_pred[..., class_id] thresholds = to_list(thresholds) num_thresholds = len(thresholds) num_predictions = array_ops.size(y_pred) # Reshape predictions and labels. predictions_2d = array_ops.reshape(y_pred, [1, -1]) labels_2d = array_ops.reshape( math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. thresh_tiled = array_ops.tile( array_ops.expand_dims(array_ops.constant(thresholds), 1), array_ops.stack([1, num_predictions])) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1]) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1]) if sample_weight is not None: weights = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred) weights_tiled = array_ops.tile( array_ops.reshape(weights, [1, -1]), [num_thresholds, 1]) else: weights_tiled = None update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast( math_ops.logical_and(label, pred), dtype=dtypes.float32) if weights is not None: label_and_pred *= weights return var.assign_add(math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def _conv_layer_params(self): weights_shape = 2, 2, 3, 4 biases_shape = weights_shape[-1:] weights = array_ops.constant(npr.RandomState(0).randn(*weights_shape)) biases = array_ops.constant(npr.RandomState(1).randn(*biases_shape)) return (weights, biases)
def _fully_connected_layer_params(self): weights_part = array_ops.constant([[1., 2.], [4., 3.]]) bias_part = array_ops.constant([1., 2.]) return (weights_part, bias_part)
def testObliviousFeatureSplitGeneration(self): with self.test_session() as sess: # The data looks like the following: # Example | Gradients | Partition | Feature ID | # i0 | (0.2, 0.12) | 1 | 1 | # i1 | (-0.5, 0.07) | 1 | 2 | # i2 | (1.2, 0.2) | 1 | 1 | # i3 | (4.0, 0.13) | 2 | 2 | gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0]) hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13]) partition_ids = [1, 1, 1, 2] indices = [[0, 0], [1, 0], [2, 0], [3, 0]] values = array_ops.constant([1, 2, 1, 2], dtype=dtypes.int64) gradient_shape = tensor_shape.scalar() hessian_shape = tensor_shape.scalar() class_id = -1 split_handler = categorical_split_handler.EqualitySplitHandler( l1_regularization=0.1, l2_regularization=1, tree_complexity_regularization=0, min_node_weight=0, sparse_int_column=sparse_tensor.SparseTensor( indices, values, [4, 1]), feature_column_group_id=0, gradient_shape=gradient_shape, hessian_shape=hessian_shape, multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS, init_stamp_token=0, weak_learner_type=learner_pb2.LearnerConfig. OBLIVIOUS_DECISION_TREE) resources.initialize_resources(resources.shared_resources()).run() empty_gradients, empty_hessians = get_empty_tensors( gradient_shape, hessian_shape) example_weights = array_ops.ones([4, 1], dtypes.float32) update_1 = split_handler.update_stats_sync( 0, partition_ids, gradients, hessians, empty_gradients, empty_hessians, example_weights, is_active=array_ops.constant([True, True])) update_2 = split_handler.update_stats_sync( 0, partition_ids, gradients, hessians, empty_gradients, empty_hessians, example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1, update_2]): are_splits_ready, partitions, gains, splits = ( split_handler.make_splits(0, 1, class_id)) are_splits_ready, partitions, gains, splits = (sess.run( [are_splits_ready, partitions, gains, splits])) self.assertTrue(are_splits_ready) self.assertAllEqual([1, 2], partitions) # For partition 1. # -(0.2 + 1.2 - 0.1) / (0.12 + 0.2 + 1) expected_left_weight1 = -0.9848484848484846 # (0.2 + 1.2 - 0.1) ** 2 / (0.12 + 0.2 + 1) expected_left_gain1 = 1.2803030303030298 # -(-0.5 + 0.1) / (0.07 + 1) expected_right_weight1 = 0.37383177570093457 # (-0.5 + 0.1) ** 2 / (0.07 + 1) expected_right_gain1 = 0.14953271028037385 # (0.2 + -0.5 + 1.2 - 0.1) ** 2 / (0.12 + 0.07 + 0.2 + 1) expected_bias_gain1 = 0.46043165467625885 split_info = split_info_pb2.ObliviousSplitInfo() split_info.ParseFromString(splits[0]) # Children of partition 1. left_child = split_info.children[0].vector right_child = split_info.children[1].vector split_node = split_info.split_node.oblivious_categorical_id_binary_split self.assertEqual(0, split_node.feature_column) self.assertEqual(1, split_node.feature_id) self.assertAllClose([expected_left_weight1], left_child.value, 0.00001) self.assertAllClose([expected_right_weight1], right_child.value, 0.00001) # For partition2. expected_left_weight2 = 0 expected_left_gain2 = 0 # -(4 - 0.1) / (0.13 + 1) expected_right_weight2 = -3.4513274336283186 # (4 - 0.1) ** 2 / (0.13 + 1) expected_right_gain2 = 13.460176991150442 # (4 - 0.1) ** 2 / (0.13 + 1) expected_bias_gain2 = 13.460176991150442 # Children of partition 2. left_child = split_info.children[2].vector right_child = split_info.children[3].vector self.assertAllClose([expected_left_weight2], left_child.value, 0.00001) self.assertAllClose([expected_right_weight2], right_child.value, 0.00001) self.assertAllClose( expected_left_gain1 + expected_right_gain1 - expected_bias_gain1 + expected_left_gain2 + expected_right_gain2 - expected_bias_gain2, gains[0], 0.00001)
def testGenerateFeatureSplitCandidatesSumReduction(self): with self.cached_session() as sess: # The data looks like the following: # Example | Gradients | Partition | Feature ID | # i0 | (0.2, 0.12) | 0 | 1,2 | # i1 | (-0.5, 0.07) | 0 | | # i2 | (1.2, 0.2) | 0 | 2 | # i3 | (4.0, 0.13) | 1 | 1 | gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0]) hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13]) partition_ids = [0, 0, 0, 1] indices = [[0, 0], [0, 1], [2, 0], [3, 0]] values = array_ops.constant([1, 2, 2, 1], dtype=dtypes.int64) gradient_shape = tensor_shape.scalar() hessian_shape = tensor_shape.scalar() class_id = -1 split_handler = categorical_split_handler.EqualitySplitHandler( l1_regularization=0.1, l2_regularization=1, tree_complexity_regularization=0, min_node_weight=0, sparse_int_column=sparse_tensor.SparseTensor( indices, values, [4, 1]), feature_column_group_id=0, gradient_shape=gradient_shape, hessian_shape=hessian_shape, multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS, init_stamp_token=0, loss_uses_sum_reduction=True) resources.initialize_resources(resources.shared_resources()).run() empty_gradients, empty_hessians = get_empty_tensors( gradient_shape, hessian_shape) example_weights = array_ops.ones([4, 1], dtypes.float32) update_1 = split_handler.update_stats_sync( 0, partition_ids, gradients, hessians, empty_gradients, empty_hessians, example_weights, is_active=array_ops.constant([True, True])) update_2 = split_handler.update_stats_sync( 0, partition_ids, gradients, hessians, empty_gradients, empty_hessians, example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1, update_2]): are_splits_ready, partitions, gains, splits = ( split_handler.make_splits(0, 1, class_id)) are_splits_ready, partitions, gains, splits = (sess.run( [are_splits_ready, partitions, gains, splits])) self.assertTrue(are_splits_ready) self.assertAllEqual([0, 1], partitions) # Check the split on partition 0. # -(0.4 + 2.4 - 0.1) / (0.24 + 0.4 + 1) expected_left_weight = -1.6463414634146338 # (0.4 + 2.4 - 0.1) ** 2 / (0.24 + 0.4 + 1) expected_left_gain = 4.445121951219511 # -(-1 + 0.1) / (0.14 + 1) expected_right_weight = 0.789473684211 # (-1 + 0.1) ** 2 / (0.14 + 1) expected_right_gain = 0.710526315789 # (0.4 + -1 + 2.4 - 0.1) ** 2 / (0.24 + 0.14 + 0.4 + 1) expected_bias_gain = 1.6235955056179772 split_info = split_info_pb2.SplitInfo() split_info.ParseFromString(splits[0]) left_child = split_info.left_child.vector right_child = split_info.right_child.vector split_node = split_info.split_node.categorical_id_binary_split self.assertEqual(0, split_node.feature_column) self.assertEqual(2, split_node.feature_id) self.assertAllClose( expected_left_gain + expected_right_gain - expected_bias_gain, gains[0], 0.00001) self.assertAllClose([expected_left_weight], left_child.value, 0.00001) self.assertAllClose([expected_right_weight], right_child.value, 0.00001) # Check the split on partition 1. # (-8 + 0.1) / (0.26 + 1) expected_left_weight = -6.26984126984 # (-8 + 0.1) ** 2 / (0.26 + 1) expected_left_gain = 49.5317460317 expected_right_weight = 0 expected_right_gain = 0 # (-8 + 0.1) ** 2 / (0.26 + 1) expected_bias_gain = 49.5317460317 # Verify candidate for partition 1, there's only one active feature here # so zero gain is expected. split_info = split_info_pb2.SplitInfo() split_info.ParseFromString(splits[1]) left_child = split_info.left_child.vector right_child = split_info.right_child.vector split_node = split_info.split_node.categorical_id_binary_split self.assertAllClose(0.0, gains[1], 0.00001) self.assertAllClose([expected_left_weight], left_child.value, 0.00001) self.assertAllClose([expected_right_weight], right_child.value, 0.00001) self.assertEqual(0, split_node.feature_column) self.assertEqual(1, split_node.feature_id)
def make_sparse(_): return sparse_tensor.SparseTensor( indices=array_ops.constant([[0, 0], [1, 0], [1, 1]], dtype=dtypes.int64), values=array_ops.constant([0, 0, 1], dtype=dtypes.int32), dense_shape=array_ops.constant([2, 2], dtype=dtypes.int64))
def run(self): return array_ops.constant(1.0)
def load_keypoints_from_quantiles(feature_names, save_dir, num_keypoints, output_min, output_max, reversed_dict=None, dtype=dtypes.float32): """Retrieves keypoints initialization values for selected features. It expects that the quantiles have already been calculated and saved in the save_dir by the save_quantiles_for_keypoints function. It will raise an I/O error if not. Args: feature_names: List of features names for which to get keypoints initialization values. save_dir: Directory where the quantiles have been saved to. Same value used when save_quantiles_for_keypoints was called. num_keypoints: Desired number of keypoints to use for calibration. This can either be a scalar to be used for all features, or a dict mapping feature name to num_keypoints. Fewer keypoints than requested can end up being used when for the given feature there are not enough different values. If num_keypoints for a feature is missing, None or 0, no initialization is generated. output_min: Initial calibrated value associated with the first calibration keypoint. The keypoints outputs in between will be linearly interpolated. It can be given as a scalar, in which case value is used for all features, or a dict mapping feature name to output_min. output_max: Like output_min, but the calibrated value associated to the last keypoint. Scalar or dict. reversed_dict: An optional dict. If reversed_dict[feature_name] is True, then the initial output keypoints will be in reversed order for that feature, i.e., input_min will be mapped to output_max, and input_max will be mapped to output_min. Reversing output keypoints is useful for decreasing monotonic calibrators. dtype: Type to be used for calibration. Returns: Dict of feature name to pair of constant tensors that can be used to initialize calibrators keypoints inputs and outputs. Raises: tf.errors.NotFoundError: if quantiles file not found. values in the signal. This would probably be better handled as categorical, but still this should handle the case correctly. """ subdir = os.path.join(save_dir, _QUANTILES_SUBDIRECTORY) num_keypoints = tools.cast_to_dict(num_keypoints, feature_names, num_keypoints) output_min = tools.cast_to_dict_of_tensor_scalars(output_min, feature_names, dtype, "output_min") output_max = tools.cast_to_dict_of_tensor_scalars(output_max, feature_names, dtype, "output_max") keypoints = {} for feature_name in feature_names: if feature_name not in num_keypoints or not num_keypoints[feature_name]: continue all_quantiles = _load_quantiles(subdir, feature_name) percentiles = np.linspace(0., 100., num_keypoints[feature_name]) quantiles = np.percentile(all_quantiles, percentiles, interpolation="nearest") quantiles = sorted(set(quantiles)) # Remove repeated quantiles. input_kpts = array_ops.constant(quantiles, shape=[len(quantiles)], dtype=dtype) output_kpts = math_ops.linspace(output_min[feature_name], output_max[feature_name], len(quantiles)) if reversed_dict is not None and reversed_dict[feature_name]: output_kpts = array_ops.reverse(output_kpts, axis=[0]) keypoints[feature_name] = (input_kpts, output_kpts) return keypoints
def patches_to_images(self, grad, batch_size, rows_in, cols_in, channels, rows_out, cols_out, ksize_r, ksize_c, stride_h, stride_r): rate_r = 1 rate_c = 1 padding = self.pad ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1) ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1) if padding == 'SAME': rows_out = int(ceil(rows_in / stride_r)) cols_out = int(ceil(cols_in / stride_h)) pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2 pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2 elif padding == 'VALID': rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r)) cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h)) pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols) grad_expanded = array_ops.transpose( array_ops.reshape( grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)), (1, 2, 3, 4, 0, 5)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) row_steps = range(0, rows_out * stride_r, stride_r) col_steps = range(0, cols_out * stride_h, stride_h) idx = [] for i in range(rows_out): for j in range(cols_out): r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff idx.extend([ (r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) for (ri, r) in enumerate(range(r_low, r_high, rate_r)) for (ci, c) in enumerate(range(c_low, c_high, rate_c)) if 0 <= r and r < rows_in and 0 <= c and c < cols_in ]) sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) sp_mat = sparse_tensor.SparseTensor( array_ops.constant(idx, dtype=ops.dtypes.int64), array_ops.ones((len(idx), ), dtype=ops.dtypes.float32), sp_shape) jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels)) grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3)) return grad_out
def _make_psd(dim): """Constructs a PSD matrix of the given dimension.""" mat = np.ones((dim, dim), dtype=np.float32) mat[np.arange(dim), np.arange(dim)] = 2. + np.arange(dim) return array_ops.constant(mat)
def testComplexDiv(self): foo = array_ops.constant([1. + 3.j]) with self.test_session(): _ = math_ops.divide(foo, 1.).eval() _ = math_ops.div(foo, 2.).eval()
def _ExtractImagePatchesGrad(op, grad): batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].get_shape() ] input_bhwc = array_ops.shape(op.inputs[0]) batch_size = input_bhwc[0] channels = input_bhwc[3] _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()] _, ksize_r, ksize_c, _ = op.get_attr('ksizes') _, stride_r, stride_h, _ = op.get_attr('strides') _, rate_r, rate_c, _ = op.get_attr('rates') padding = op.get_attr('padding') ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1) ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1) if padding == b'SAME': rows_out = int(ceil(rows_in / stride_r)) cols_out = int(ceil(cols_in / stride_h)) pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2 pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2 elif padding == b'VALID': rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r)) cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h)) pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols) grad_expanded = array_ops.transpose( array_ops.reshape( grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)), (1, 2, 3, 4, 0, 5)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) row_steps = range(0, rows_out * stride_r, stride_r) col_steps = range(0, cols_out * stride_h, stride_h) idx = [] for i in range(rows_out): for j in range(cols_out): r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff idx.extend([ (r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) for (ri, r) in enumerate(range(r_low, r_high, rate_r)) for (ci, c) in enumerate(range(c_low, c_high, rate_c)) if 0 <= r and r < rows_in and 0 <= c and c < cols_in ]) sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) sp_mat = sparse_tensor.SparseTensor( array_ops.constant(idx, dtype=ops.dtypes.int64), array_ops.ones((len(idx), ), dtype=ops.dtypes.float32), sp_shape) jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels)) grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3)) return [grad_out]
def event_shape(self, name="event_shape"): with ops.name_scope(self.name): with ops.op_scope([self._batch_shape], name): return array_ops.constant([], dtype=self._batch_shape.dtype)
def testDenseToSparseBatchDatasetWithInvalidShape(self): input_tensor = array_ops.constant([[1]]) with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"): dataset_ops.Dataset.from_tensors(input_tensor).apply( batching.dense_to_sparse_batch( 4, [-2])).make_initializable_iterator()
def testDivideName(self): with self.test_session(): op = math_ops.divide(array_ops.constant(3), array_ops.constant(4), name="my_cool_divide") self.assertEqual(op.name, "my_cool_divide:0")
def _finalize_func(unused_string_handle): return array_ops.constant(0, dtypes.int64)
def _serving_input_receiver_fn(): return array_ops.constant([1]), None
def _dynamic_rnn_loop( cell, inputs, initial_state, ff_keep_prob, recur_keep_prob, parallel_iterations, swap_memory, sequence_length=None): """Internal implementation of Dynamic RNN. Args: cell: An instance of RNNCell. inputs: A `Tensor` of shape [time, batch_size, depth]. initial_state: A `Tensor` of shape [batch_size, depth]. parallel_iterations: Positive Python int. swap_memory: A Python boolean sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. Returns: Tuple (final_outputs, final_state). final_outputs: A `Tensor` of shape [time, batch_size, depth]`. final_state: A `Tensor` of shape [batch_size, depth]. Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" # Construct an initial output input_shape = array_ops.shape(inputs) (time_steps, batch_size, _) = array_ops.unpack(input_shape, 3) inputs_got_shape = inputs.get_shape().with_rank(3) (const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list() if const_depth is None: raise ValueError( "Input size (depth of inputs) must be accessible via shape inference, " "but saw value None.") # Prepare dynamic conditional copying of state & output zero_output = array_ops.zeros( array_ops.pack([batch_size, cell.output_size]), inputs.dtype) if sequence_length is not None: min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) time = array_ops.constant(0, dtype=dtypes.int32, name="time") with ops.op_scope([], "dynamic_rnn") as scope: base_name = scope output_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "output") input_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "input") if isinstance(ff_keep_prob, ops.Tensor) or ff_keep_prob < 1: inputs = nn_ops.dropout(inputs, ff_keep_prob, noise_shape=array_ops.pack([1, batch_size, const_depth])) input_ta = input_ta.unpack(inputs) if isinstance(recur_keep_prob, ops.Tensor) or recur_keep_prob < 1: ones = array_ops.ones(array_ops.pack([batch_size, cell.output_size]), inputs.dtype) state_dropout = nn_ops.dropout(ones, recur_keep_prob) state_dropout = array_ops.concat(1, [ones] * (cell.state_size // cell.output_size - 1) + [state_dropout]) else: state_dropout = 1. def _time_step(time, state, output_ta_t): """Take a time step of the dynamic RNN. Args: time: int32 scalar Tensor. state: Vector. output_ta_t: `TensorArray`, the output with existing flow. Returns: The tuple (time + 1, new_state, output_ta_t with updated flow). """ input_t = input_ta.read(time) # Restore some shape information input_t.set_shape([const_batch_size, const_depth]) call_cell = lambda: cell(input_t, state*state_dropout) if sequence_length is not None: (output, new_state) = _rnn_step( time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, skip_conditionals=True) else: (output, new_state) = call_cell() output_ta_t = output_ta_t.write(time, output) return (time + 1, new_state, output_ta_t) (_, final_state, output_final_ta) = control_flow_ops.while_loop( cond=lambda time, _1, _2: time < time_steps, body=_time_step, loop_vars=(time, state, output_ta), parallel_iterations=parallel_iterations, swap_memory=swap_memory) final_outputs = output_final_ta.pack() # Restore some shape information final_outputs.set_shape([ const_time_steps, const_batch_size, cell.output_size]) return (final_outputs, final_state)
def _confusion_matrix_at_thresholds(labels, predictions, thresholds, weights=None): with ops.control_dependencies([ check_ops.assert_greater_equal( predictions, math_ops.cast(0.0, dtype=predictions.dtype), message='predictions must be in [0, 1]'), check_ops.assert_less_equal( predictions, math_ops.cast(1.0, dtype=predictions.dtype), message='predictions must be in [0, 1]') ]): predictions, labels, weights = _remove_squeezable_dimensions( predictions=math_ops.to_float(predictions), labels=math_ops.cast(labels, dtype=dtypes.bool), weights=weights) num_thresholds = len(thresholds) # Reshape predictions and labels. predictions_2d = array_ops.reshape(predictions, [-1, 1]) labels_2d = array_ops.reshape(math_ops.cast(labels, dtype=dtypes.bool), [1, -1]) # Use static shape if known. num_predictions = predictions_2d.get_shape().as_list()[0] # Otherwise use dynamic shape. if num_predictions is None: num_predictions = array_ops.shape(predictions_2d)[0] thresh_tiled = array_ops.tile( array_ops.expand_dims(array_ops.constant(thresholds), [1]), array_ops.stack([1, num_predictions])) # Tile the predictions after threshold them across different thresholds. pred_is_pos = math_ops.greater( array_ops.tile(array_ops.transpose(predictions_2d), [num_thresholds, 1]), thresh_tiled) pred_is_neg = math_ops.logical_not(pred_is_pos) label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1]) label_is_neg = math_ops.logical_not(label_is_pos) if weights is not None: weights = weights_broadcast_ops.broadcast_weights( math_ops.to_float(weights), predictions) weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]), [num_thresholds, 1]) thresh_tiled.get_shape().assert_is_compatible_with( weights_tiled.get_shape()) else: weights_tiled = None values = {} # tp is_true_positive = math_ops.to_float( math_ops.logical_and(label_is_pos, pred_is_pos)) if weights_tiled is not None: is_true_positive *= weights_tiled values['tp'] = math_ops.reduce_sum(is_true_positive, 1) # fn is_false_negative = math_ops.to_float( math_ops.logical_and(label_is_pos, pred_is_neg)) if weights_tiled is not None: is_false_negative *= weights_tiled values['fn'] = math_ops.reduce_sum(is_false_negative, 1) # tn is_true_negative = math_ops.to_float( math_ops.logical_and(label_is_neg, pred_is_neg)) if weights_tiled is not None: is_true_negative *= weights_tiled values['tn'] = math_ops.reduce_sum(is_true_negative, 1) # fp is_false_positive = math_ops.to_float( math_ops.logical_and(label_is_neg, pred_is_pos)) if weights_tiled is not None: is_false_positive *= weights_tiled values['fp'] = math_ops.reduce_sum(is_false_positive, 1) return values
def testLastOneEmpty(self): with self.cached_session() as sess: # The data looks like the following: # Example | Gradients | Partition | Feature ID | # i0 | (0.2, 0.12) | 0 | 1,2 | # i1 | (-0.5, 0.07) | 0 | | # i2 | (1.2, 0.2) | 0 | 2 | # i3 | (4.0, 0.13) | 1 | | gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0]) hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13]) partition_ids = [0, 0, 0, 1] indices = [[0, 0], [0, 1], [2, 0]] values = array_ops.constant([1, 2, 2], dtype=dtypes.int64) gradient_shape = tensor_shape.scalar() hessian_shape = tensor_shape.scalar() class_id = -1 split_handler = categorical_split_handler.EqualitySplitHandler( l1_regularization=0.1, l2_regularization=1, tree_complexity_regularization=0, min_node_weight=0, sparse_int_column=sparse_tensor.SparseTensor( indices, values, [4, 1]), feature_column_group_id=0, gradient_shape=gradient_shape, hessian_shape=hessian_shape, multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS, init_stamp_token=0) resources.initialize_resources(resources.shared_resources()).run() empty_gradients, empty_hessians = get_empty_tensors( gradient_shape, hessian_shape) example_weights = array_ops.ones([4, 1], dtypes.float32) update_1 = split_handler.update_stats_sync( 0, partition_ids, gradients, hessians, empty_gradients, empty_hessians, example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): are_splits_ready, partitions, gains, splits = ( split_handler.make_splits(0, 1, class_id)) are_splits_ready, partitions, gains, splits = (sess.run( [are_splits_ready, partitions, gains, splits])) self.assertTrue(are_splits_ready) self.assertAllEqual([0], partitions) # Check the split on partition 0. # -(0.2 + 1.2 - 0.1) / (0.12 + 0.2 + 1) expected_left_weight = -0.9848484848484846 # (0.2 + 1.2 - 0.1) ** 2 / (0.12 + 0.2 + 1) expected_left_gain = 1.2803030303030298 # -(-0.5 + 0.1) / (0.07 + 1) expected_right_weight = 0.37383177570093457 # (-0.5 + 0.1) ** 2 / (0.07 + 1) expected_right_gain = 0.14953271028037385 # (0.2 + -0.5 + 1.2 - 0.1) ** 2 / (0.12 + 0.07 + 0.2 + 1) expected_bias_gain = 0.46043165467625885 split_info = split_info_pb2.SplitInfo() split_info.ParseFromString(splits[0]) left_child = split_info.left_child.vector right_child = split_info.right_child.vector split_node = split_info.split_node.categorical_id_binary_split self.assertEqual(0, split_node.feature_column) self.assertEqual(2, split_node.feature_id) self.assertAllClose( expected_left_gain + expected_right_gain - expected_bias_gain, gains[0], 0.00001) self.assertAllClose([expected_left_weight], left_child.value, 0.00001) self.assertAllClose([expected_right_weight], right_child.value, 0.00001)
def _dynamic_rnn_loop( cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length=None): """Internal implementation of Dynamic RNN. Args: cell: An instance of RNNCell. inputs: A `Tensor` of shape [time, batch_size, input_size]. initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if `cell.state_size` is a tuple, then this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. parallel_iterations: Positive Python int. swap_memory: A Python boolean sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. Returns: Tuple `(final_outputs, final_state)`. final_outputs: A `Tensor` of shape `[time, batch_size, cell.output_size]`. final_state: A `Tensor` matrix, or tuple of such matrices, matching in length and shapes to `initial_state`. Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" # Construct an initial output input_shape = array_ops.shape(inputs) time_steps = input_shape[0] batch_size = input_shape[1] inputs_got_shape = inputs.get_shape().with_rank_at_least(3).as_list() const_time_steps = inputs_got_shape[0] const_batch_size = inputs_got_shape[1] const_depth = inputs_got_shape[2:] if const_depth is None: raise ValueError( "Input size (depth of inputs) must be accessible via shape inference, " "but saw value None.") # Prepare dynamic conditional copying of state & output zeros_size = _state_size_with_prefix(cell.output_size, prefix=[batch_size]) zero_output = array_ops.zeros(array_ops.pack(zeros_size), inputs.dtype) if sequence_length is not None: min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) time = array_ops.constant(0, dtype=dtypes.int32, name="time") state_size = cell.state_size state_is_tuple = nest.is_sequence(state_size) state = nest.flatten(state) if state_is_tuple else (state,) with ops.op_scope([], "dynamic_rnn") as scope: base_name = scope output_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "output") input_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "input") input_ta = input_ta.unpack(inputs) def _time_step(time, output_ta_t, *state): """Take a time step of the dynamic RNN. Args: time: int32 scalar Tensor. output_ta_t: `TensorArray`, the output with existing flow. *state: List of vector tensors. Returns: The tuple (time + 1, output_ta_t with updated flow) + new_state. """ input_t = input_ta.read(time) # Restore some shape information input_t.set_shape([const_batch_size] + const_depth) # Pack state back up for use by cell state = (nest.pack_sequence_as(structure=state_size, flat_sequence=state) if state_is_tuple else state[0]) call_cell = lambda: cell(input_t, state) if sequence_length is not None: (output, new_state) = _rnn_step( time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, state_size=state_size, skip_conditionals=True) else: (output, new_state) = call_cell() # Pack state if using state tuples new_state = ( tuple(nest.flatten(new_state)) if state_is_tuple else (new_state,)) output_ta_t = output_ta_t.write(time, output) return (time + 1, output_ta_t) + new_state final_loop_vars = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_time_step, loop_vars=(time, output_ta) + tuple(state), parallel_iterations=parallel_iterations, swap_memory=swap_memory) (output_final_ta, final_state) = (final_loop_vars[1], final_loop_vars[2:]) final_outputs = output_final_ta.pack() # Restore some shape information final_outputs_size = _state_size_with_prefix( cell.output_size, prefix=[const_time_steps, const_batch_size]) final_outputs.set_shape(final_outputs_size) # Unpack final state if not using state tuples. final_state = ( nest.pack_sequence_as( structure=cell.state_size, flat_sequence=final_state) if state_is_tuple else final_state[0]) return (final_outputs, final_state)
def sample(dim, num_results=None, sequence_indices=None, dtype=None, randomized=True, seed=None, name=None): r"""Returns a sample from the `dim` dimensional Halton sequence. Warning: The sequence elements take values only between 0 and 1. Care must be taken to appropriately transform the domain of a function if it differs from the unit cube before evaluating integrals using Halton samples. It is also important to remember that quasi-random numbers without randomization are not a replacement for pseudo-random numbers in every context. Quasi random numbers are completely deterministic and typically have significant negative autocorrelation unless randomization is used. Computes the members of the low discrepancy Halton sequence in dimension `dim`. The `dim`-dimensional sequence takes values in the unit hypercube in `dim` dimensions. Currently, only dimensions up to 1000 are supported. The prime base for the k-th axes is the k-th prime starting from 2. For example, if `dim` = 3, then the bases will be [2, 3, 5] respectively and the first element of the non-randomized sequence will be: [0.5, 0.333, 0.2]. For a more complete description of the Halton sequences see: https://en.wikipedia.org/wiki/Halton_sequence. For low discrepancy sequences and their applications see: https://en.wikipedia.org/wiki/Low-discrepancy_sequence. If `randomized` is true, this function produces a scrambled version of the Halton sequence introduced by Owen in arXiv:1706.02808. For the advantages of randomization of low discrepancy sequences see: https://en.wikipedia.org/wiki/Quasi-Monte_Carlo_method#Randomization_of_quasi-Monte_Carlo The number of samples produced is controlled by the `num_results` and `sequence_indices` parameters. The user must supply either `num_results` or `sequence_indices` but not both. The former is the number of samples to produce starting from the first element. If `sequence_indices` is given instead, the specified elements of the sequence are generated. For example, sequence_indices=tf.range(10) is equivalent to specifying n=10. Example Use: ```python bf = tf.contrib.bayesflow # Produce the first 1000 members of the Halton sequence in 3 dimensions. num_results = 1000 dim = 3 sample = bf.halton_sequence.sample(dim, num_results=num_results, seed=127) # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional # hypercube. powers = tf.range(1.0, limit=dim + 1) integral = tf.reduce_mean(tf.reduce_prod(sample ** powers, axis=-1)) true_value = 1.0 / tf.reduce_prod(powers + 1.0) with tf.Session() as session: values = session.run((integral, true_value)) # Produces a relative absolute error of 1.7%. print ("Estimated: %f, True Value: %f" % values) # Now skip the first 1000 samples and recompute the integral with the next # thousand samples. The sequence_indices argument can be used to do this. sequence_indices = tf.range(start=1000, limit=1000 + num_results, dtype=tf.int32) sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, seed=111217) integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers, axis=-1)) with tf.Session() as session: values = session.run((integral_leaped, true_value)) # Now produces a relative absolute error of 0.05%. print ("Leaped Estimated: %f, True Value: %f" % values) ``` Args: dim: Positive Python `int` representing each sample's `event_size.` Must not be greater than 1000. num_results: (Optional) positive Python `int`. The number of samples to generate. Either this parameter or sequence_indices must be specified but not both. If this parameter is None, then the behaviour is determined by the `sequence_indices`. sequence_indices: (Optional) `Tensor` of dtype int32 and rank 1. The elements of the sequence to compute specified by their position in the sequence. The entries index into the Halton sequence starting with 0 and hence, must be whole numbers. For example, sequence_indices=[0, 5, 6] will produce the first, sixth and seventh elements of the sequence. If this parameter is None, then the `num_results` parameter must be specified which gives the number of desired samples starting from the first sample. dtype: (Optional) The dtype of the sample. One of `float32` or `float64`. Default is `float32`. randomized: (Optional) bool indicating whether to produce a randomized Halton sequence. If True, applies the randomization described in Owen (2017) [arXiv:1706.02808]. seed: (Optional) Python integer to seed the random number generator. Only used if `randomized` is True. If not supplied and `randomized` is True, no seed is set. name: (Optional) Python `str` describing ops managed by this function. If not supplied the name of this function is used. Returns: halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype and `shape` `[num_results, dim]` if `num_results` was specified or shape `[s, dim]` where s is the size of `sequence_indices` if `sequence_indices` were specified. Raises: ValueError: if both `sequence_indices` and `num_results` were specified or if dimension `dim` is less than 1 or greater than 1000. """ if dim < 1 or dim > _MAX_DIMENSION: raise ValueError( 'Dimension must be between 1 and {}. Supplied {}'.format( _MAX_DIMENSION, dim)) if (num_results is None) == (sequence_indices is None): raise ValueError('Either `num_results` or `sequence_indices` must be' ' specified but not both.') dtype = dtype or dtypes.float32 if not dtype.is_floating: raise ValueError('dtype must be of `float`-type') with ops.name_scope(name, 'sample', values=[sequence_indices]): # Here and in the following, the shape layout is as follows: # [sample dimension, event dimension, coefficient dimension]. # The coefficient dimension is an intermediate axes which will hold the # weights of the starting integer when expressed in the (prime) base for # an event dimension. indices = _get_indices(num_results, sequence_indices, dtype) radixes = array_ops.constant(_PRIMES[0:dim], dtype=dtype, shape=[dim, 1]) max_sizes_by_axes = _base_expansion_size(math_ops.reduce_max(indices), radixes) max_size = math_ops.reduce_max(max_sizes_by_axes) # The powers of the radixes that we will need. Note that there is a bit # of an excess here. Suppose we need the place value coefficients of 7 # in base 2 and 3. For 2, we will have 3 digits but we only need 2 digits # for base 3. However, we can only create rectangular tensors so we # store both expansions in a [2, 3] tensor. This leads to the problem that # we might end up attempting to raise large numbers to large powers. For # example, base 2 expansion of 1024 has 10 digits. If we were in 10 # dimensions, then the 10th prime (29) we will end up computing 29^10 even # though we don't need it. We avoid this by setting the exponents for each # axes to 0 beyond the maximum value needed for that dimension. exponents_by_axes = array_ops.tile([math_ops.range(max_size)], [dim, 1]) # The mask is true for those coefficients that are irrelevant. weight_mask = exponents_by_axes >= max_sizes_by_axes capped_exponents = array_ops.where( weight_mask, array_ops.zeros_like(exponents_by_axes), exponents_by_axes) weights = radixes**capped_exponents # The following computes the base b expansion of the indices. Suppose, # x = a0 + a1*b + a2*b^2 + ... Then, performing a floor div of x with # the vector (1, b, b^2, b^3, ...) will produce # (a0 + s1 * b, a1 + s2 * b, ...) where s_i are coefficients we don't care # about. Noting that all a_i < b by definition of place value expansion, # we see that taking the elements mod b of the above vector produces the # place value expansion coefficients. coeffs = math_ops.floor_div(indices, weights) coeffs *= 1 - math_ops.cast(weight_mask, dtype) coeffs %= radixes if not randomized: coeffs /= radixes return math_ops.reduce_sum(coeffs / weights, axis=-1) coeffs = _randomize(coeffs, radixes, seed=seed) # Remove the contribution from randomizing the trailing zero for the # axes where max_size_by_axes < max_size. This will be accounted # for separately below (using zero_correction). coeffs *= 1 - math_ops.cast(weight_mask, dtype) coeffs /= radixes base_values = math_ops.reduce_sum(coeffs / weights, axis=-1) # The randomization used in Owen (2017) does not leave 0 invariant. While # we have accounted for the randomization of the first `max_size_by_axes` # coefficients, we still need to correct for the trailing zeros. Luckily, # this is equivalent to adding a uniform random value scaled so the first # `max_size_by_axes` coefficients are zero. The following statements perform # this correction. zero_correction = random_ops.random_uniform([dim, 1], seed=seed, dtype=dtype) zero_correction /= (radixes**max_sizes_by_axes) return base_values + array_ops.reshape(zero_correction, [-1])
def conditional_batch_norm(inputs, conditional_layer, var_scope_postfix='', decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, param_initializers=None, param_regularizers=None, updates_collections=tf.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, data_format=DATA_FORMAT_NHWC, zero_debias_moving_mean=False, renorm=False, renorm_clipping=None, renorm_momentum=0.99, scope=None): """Custom implementation of batch norm to support the optional `conditional_layer` and `var_scope_postfix`. For comments on the other parameters, see tensorflow.contrib.layers.python.layers.batch_norm, where this is copied from (tf 1.5 version). Args: conditional_layer: A tensor with 2 dimensions [batch, channels]. If not None, the beta and gamma parameters will be conditioned on the `conditional_layer`. var_scope_postfix: A string. Append it to the var scopes of all variables other than the weight and bias. e.g. var scope of the `gamma` variable becomes `'gamma' + var_scope_postfix`. """ if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') if inputs.dtype != tf.float32: raise NotImplementedError( 'This implementation may not be compatible with mixed precision training.' ) with tf.variable_scope(scope, 'BatchNorm', [inputs], reuse=reuse) as sc: if conditional_layer is not None: conditional_layer = tf.convert_to_tensor(conditional_layer) # Normalizing the conditional layer seems to stabilize training a little. conditional_layer = tf.nn.l2_normalize( conditional_layer, dim=1, name='normalized_conditional_layer') conditional_layer_shape = conditional_layer.get_shape() conditional_layer_rank = conditional_layer_shape.ndims if conditional_layer_rank is None: raise ValueError('Conditional layer %s has undefined rank' % conditional_layer.name) elif conditional_layer_rank != 2: raise ValueError('Conditional layer %s is not rank 2.' % conditional_layer.name) inputs = tf.convert_to_tensor(inputs) original_shape = inputs.get_shape() original_inputs = inputs original_rank = original_shape.ndims if original_rank is None: raise ValueError('Inputs %s has undefined rank' % inputs.name) elif original_rank not in [2, 4]: raise ValueError('Inputs %s has unsupported rank.' ' Expected 2 or 4 but got %d' % (inputs.name, original_rank)) if original_rank == 2: channels = inputs.get_shape()[-1].value if channels is None: raise ValueError('`C` dimension must be known but is None') new_shape = [-1, 1, 1, channels] if data_format == DATA_FORMAT_NCHW: new_shape = [-1, channels, 1, 1] inputs = tf.reshape(inputs, new_shape) inputs_shape = inputs.get_shape() if data_format == DATA_FORMAT_NHWC: params_shape = inputs_shape[-1:] else: params_shape = inputs_shape[1:2] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined `C` dimension %s.' % (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta_collections = utils.get_variable_collections( variables_collections, 'beta') variable_dtype = inputs.dtype if not param_initializers: param_initializers = {} if not param_regularizers: param_regularizers = {} if center: beta_scope = 'beta' + var_scope_postfix if conditional_layer is not None: assert not param_initializers, 'param_initializers are not supported with conditional layer.' assert not param_regularizers, 'param_initializers are not supported with conditional layer.' beta = get_conditional_batch_norm_param(conditional_layer, int(params_shape[-1]), scope=beta_scope) else: # Behaves like normal batch norm. beta_collections = utils.get_variable_collections( variables_collections, beta_scope) beta_initializer = param_initializers.get( beta_scope, tf.zeros_initializer()) beta_regularizer = param_regularizers.get('beta') beta = variables.model_variable(beta_scope, shape=params_shape, dtype=variable_dtype, initializer=beta_initializer, regularizer=beta_regularizer, collections=beta_collections, trainable=trainable) else: beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape) if scale: gamma_scope = 'gamma' + var_scope_postfix if conditional_layer is not None: assert not param_initializers, 'param_initializers are not supported with conditional layer.' assert not param_regularizers, 'param_initializers are not supported with conditional layer.' delta_gamma = get_conditional_batch_norm_param( conditional_layer, int(params_shape[-1]), scope=gamma_scope) # Per https://arxiv.org/pdf/1707.03017.pdf. gamma = tf.constant( 1.0, dtype=variable_dtype, ) + delta_gamma else: gamma_collections = utils.get_variable_collections( variables_collections, gamma_scope) gamma_initializer = param_initializers.get( gamma_scope, tf.ones_initializer()) gamma_regularizer = param_regularizers.get('gamma') gamma = variables.model_variable(gamma_scope, shape=params_shape, dtype=variable_dtype, initializer=gamma_initializer, regularizer=gamma_regularizer, collections=gamma_collections, trainable=trainable) else: gamma = tf.constant(1.0, dtype=variable_dtype, shape=params_shape) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating # them, because assign_moving_average is not yet supported for partitioned # variables (this needs to be handled carefully, as it may break # the checkpoint backward compatibility). with tf.variable_scope(tf.get_variable_scope()) as local_scope: local_scope.set_partitioner(None) moving_mean_scope = 'moving_mean' + var_scope_postfix moving_mean_collections = utils.get_variable_collections( variables_collections, moving_mean_scope) moving_mean_initializer = param_initializers.get( moving_mean_scope, tf.zeros_initializer()) moving_mean = variables.model_variable( moving_mean_scope, shape=params_shape, dtype=tf.float32, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) moving_variance_scope = 'moving_variance' + var_scope_postfix moving_variance_collections = utils.get_variable_collections( variables_collections, moving_variance_scope) moving_variance_initializer = param_initializers.get( moving_variance_scope, tf.ones_initializer()) moving_variance = variables.model_variable( moving_variance_scope, shape=params_shape, dtype=tf.float32, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) if renorm: renorm_clipping = renorm_clipping or {} keys = ['rmax', 'rmin', 'dmax'] if set(renorm_clipping) - set(keys): raise ValueError( 'renorm_clipping %s contains keys not in %s' % (renorm_clipping, keys)) # Create variables to maintain the moving mean and standard deviation. # These are used in training and thus are different from the moving # averages above. The renorm variables are colocated with moving_mean # and moving_variance. # NOTE: below, the outer `with device` block causes the current device # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): var = variables.model_variable( name= name, # renorm variable should be dependent on var_scope_postfix. shape=shape, dtype=tf.float32, initializer=param_initializers.get( name, tf.zeros_initializer()), trainable=False) return var with ops.device(None): device = ((lambda _: moving_mean.device) if context.executing_eagerly() else moving_mean.device) with ops.device(device): renorm_mean = _renorm_variable( 'renorm_mean' + var_scope_postfix, params_shape) renorm_mean_weight = _renorm_variable( 'renorm_mean_weight' + var_scope_postfix, ()) # We initialize renorm_stddev to 0, and maintain the (0-initialized) # renorm_stddev_weight. This allows us to (1) mix the average # stddev with the minibatch stddev early in training, and (2) compute # the unbiased average stddev by dividing renorm_stddev by the weight. device = ((lambda _: moving_variance.device) if context.executing_eagerly() else moving_variance.device) with ops.device(device): renorm_stddev = _renorm_variable( 'renorm_stddev' + var_scope_postfix, params_shape) renorm_stddev_weight = _renorm_variable( 'renorm_stddev_weight' + var_scope_postfix, ()) class dotdict(dict): """dot.notation access to dictionary attributes""" __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ renorm_params = dotdict({ 'renorm_mean': renorm_mean, 'renorm_mean_weight': renorm_mean_weight, 'renorm_stddev': renorm_stddev, 'renorm_stddev_weight': renorm_stddev_weight, 'renorm_clipping': renorm_clipping, 'renorm_momentum': renorm_momentum, 'moving_mean': moving_mean, 'moving_variance': moving_variance, 'epsilon': epsilon }) else: renorm_params = None def _batch_norm_training(): # return tf.nn.fused_batch_norm( return _batch_norm_aux(inputs, gamma, beta, epsilon=epsilon, data_format=data_format, renorm=renorm, renorm_params=renorm_params) def _batch_norm_inference(): # return tf.nn.fused_batch_norm( return _batch_norm_aux(inputs, gamma, beta, mean=tf.cast(moving_mean, dtype=variable_dtype), variance=tf.cast(moving_variance, dtype=variable_dtype), epsilon=epsilon, is_training=False, data_format=data_format, renorm=renorm, renorm_params=renorm_params) outputs, mean, variance = utils.smart_cond(is_training, _batch_norm_training, _batch_norm_inference) # If `is_training` doesn't have a constant value, because it is a `Tensor`, # a `Variable` or `Placeholder` then is_training_value will be None and # `need_updates` will be true. is_training_value = utils.constant_value(is_training) need_updates = is_training_value is None or is_training_value if need_updates: if updates_collections is None: no_updates = lambda: outputs def _force_updates(): """Internal function forces updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False) with tf.control_dependencies( [update_moving_mean, update_moving_variance]): return tf.identity(outputs) outputs = utils.smart_cond(is_training, _force_updates, no_updates) else: moving_vars_fn = lambda: (moving_mean, moving_variance) def _delay_updates(): """Internal function that delay updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, tf.cast(mean, dtype=moving_mean.dtype), decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, tf.cast(variance, dtype=moving_variance.dtype), decay, zero_debias=False) return update_moving_mean, update_moving_variance update_mean, update_variance = utils.smart_cond( is_training, _delay_updates, moving_vars_fn) ops.add_to_collections(updates_collections, update_mean) ops.add_to_collections(updates_collections, update_variance) outputs.set_shape(inputs_shape) if original_shape.ndims == 2: outputs = array_ops.reshape(outputs, array_ops.shape(original_inputs)) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def leapfrog_integrator(step_size, n_steps, initial_position, initial_momentum, potential_and_grad, initial_grad, name=None): """Applies `n_steps` steps of the leapfrog integrator. This just wraps `leapfrog_step()` in a `tf.while_loop()`, reusing gradient computations where possible. Args: step_size: Scalar step size or array of step sizes for the leapfrog integrator. Broadcasts to the shape of `initial_position`. Larger step sizes lead to faster progress, but too-large step sizes lead to larger discretization error and worse energy conservation. n_steps: Number of steps to run the leapfrog integrator. initial_position: Tensor containing the value(s) of the position variable(s) to update. initial_momentum: Tensor containing the value(s) of the momentum variable(s) to update. potential_and_grad: Python callable that takes a position tensor like `initial_position` and returns the potential energy and its gradient at that position. initial_grad: Tensor with the value of the gradient of the potential energy at `initial_position`. name: Python `str` name prefixed to Ops created by this function. Returns: updated_position: Updated value of the position. updated_momentum: Updated value of the momentum. new_potential: Potential energy of the new position. Has shape matching `potential_and_grad(initial_position)`. new_grad: Gradient from potential_and_grad() evaluated at the new position. Has shape matching `initial_position`. Example: Simple quadratic potential. ```python def potential_and_grad(position): return tf.reduce_sum(0.5 * tf.square(position)), position position = tf.placeholder(np.float32) momentum = tf.placeholder(np.float32) potential, grad = potential_and_grad(position) new_position, new_momentum, new_potential, new_grad = hmc.leapfrog_integrator( 0.1, 3, position, momentum, potential_and_grad, grad) sess = tf.Session() position_val = np.random.randn(10) momentum_val = np.random.randn(10) potential_val, grad_val = sess.run([potential, grad], {position: position_val}) positions = np.zeros([100, 10]) for i in xrange(100): position_val, momentum_val, potential_val, grad_val = sess.run( [new_position, new_momentum, new_potential, new_grad], {position: position_val, momentum: momentum_val}) positions[i] = position_val # Should trace out sinusoidal dynamics. plt.plot(positions[:, 0]) ``` """ def leapfrog_wrapper(step_size, x, m, grad, l): x, m, _, grad = leapfrog_step(step_size, x, m, potential_and_grad, grad) return step_size, x, m, grad, l + 1 def counter_fn(a, b, c, d, counter): # pylint: disable=unused-argument return counter < n_steps with ops.name_scope( name, 'leapfrog_integrator', [step_size, n_steps, initial_position, initial_momentum, initial_grad ]): _, new_x, new_m, new_grad, _ = control_flow_ops.while_loop( counter_fn, leapfrog_wrapper, [ step_size, initial_position, initial_momentum, initial_grad, array_ops.constant(0) ], back_prop=False) # We're counting on the runtime to eliminate this redundant computation. new_potential, new_grad = potential_and_grad(new_x) return new_x, new_m, new_potential, new_grad