def test_with_invalid_value_range(self): values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] with self.assertRaisesRegexp( ValueError, "Shape must be rank 1 but is rank 0"): histogram_ops.histogram_fixed_width(values, 1.0) with self.assertRaisesRegexp(ValueError, "Dimension must be 2 but is 3"): histogram_ops.histogram_fixed_width(values, [1.0, 2.0, 3.0])
def test_with_invalid_nbins(self): values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] with self.assertRaisesRegex(ValueError, "Shape must be rank 0 but is rank 1"): histogram_ops.histogram_fixed_width(values, [1.0, 5.0], nbins=[1, 2]) with self.assertRaisesRegex(ValueError, "Requires nbins > 0"): histogram_ops.histogram_fixed_width(values, [1.0, 5.0], nbins=-5)
def test_with_invalid_nbins(self): values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] with self.assertRaisesRegexp( ValueError, "Shape must be rank 0 but is rank 1"): histogram_ops.histogram_fixed_width(values, [1.0, 5.0], nbins=[1, 2]) with self.assertRaisesRegexp( ValueError, "Requires nbins > 0"): histogram_ops.histogram_fixed_width(values, [1.0, 5.0], nbins=-5)
def test_two_updates_on_constant_input(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) nbins = [5] value_range = [0.0, 5.0] new_values_1 = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] new_values_2 = [1.5, 4.5, 4.5, 4.5, 0.0, 0.0] expected_bin_counts_1 = [2, 1, 1, 0, 2] expected_bin_counts_2 = [4, 2, 1, 0, 5] with self.test_session() as sess: hist = variables.Variable(array_ops.zeros(nbins, dtype=dtypes.int32)) new_values = array_ops.placeholder(dtypes.float32, shape=[6]) hist_update = histogram_ops.histogram_fixed_width(hist, new_values, value_range) variables.initialize_all_variables().run() updated_hist_array = sess.run(hist_update, feed_dict={new_values: new_values_1}) # The new updated_hist_array is returned by the updating op. # hist should contain the updated values. self.assertAllClose(expected_bin_counts_1, updated_hist_array) self.assertAllClose(expected_bin_counts_1, hist.eval()) updated_hist_array = sess.run(hist_update, feed_dict={new_values: new_values_2}) self.assertAllClose(expected_bin_counts_2, updated_hist_array) self.assertAllClose(expected_bin_counts_2, hist.eval())
def test_multiple_random_accumulating_updates_results_in_right_dist(self): # Accumulate the updates in a new variable. Resultant # histogram should be uniform. Use only 3 bins because with many bins it # would be unlikely that all would be close to 1/n. If someone ever wants # to test that, it would be better to check that the cdf was linear. value_range = [1.0, 4.14159] with self.test_session() as sess: values = array_ops.placeholder(dtypes.float32, shape=[4, 4, 4]) hist = histogram_ops.histogram_fixed_width( values, value_range, nbins=3, dtype=dtypes.int64) hist_accum = variables.Variable(init_ops.zeros_initializer()( [3], dtype=dtypes.int64)) hist_accum = hist_accum.assign_add(hist) variables.global_variables_initializer().run() for _ in range(100): # Map the rv: U[0, 1] --> U[value_range[0], value_range[1]]. values_arr = ( value_range[0] + (value_range[1] - value_range[0]) * self.rng.rand(4, 4, 4)) hist_accum_arr = sess.run(hist_accum, feed_dict={values: values_arr}) pmf = hist_accum_arr / float(hist_accum_arr.sum()) np.testing.assert_allclose(1 / 3, pmf, atol=0.02)
def test_multiple_random_3d_updates_results_in_right_dist(self): # Update with uniform 3-D rvs. Resultant # histogram should be uniform. Use only 3 bins because with many bins it # would be unlikely that all would be close to 1/n. If someone ever wants # to test that, it would be better to check that the cdf was linear. nbins = [3] value_range = [1.0, 4.14159] with self.test_session() as sess: hist = variables.Variable(array_ops.zeros(nbins, dtype=dtypes.int32)) new_values = array_ops.placeholder(dtypes.float32, shape=[4, 4, 4]) hist_update = histogram_ops.histogram_fixed_width(hist, new_values, value_range) variables.initialize_all_variables().run() for _ in range(100): # Map the rv: U[0, 1] --> U[value_range[0], value_range[1]]. new_values_arr = ( value_range[0] + (value_range[1] - value_range[0]) * self.rng.rand(4, 4, 4)) # The new updated_hist_array is returned by the updating op. # hist should contain the updated values. updated_hist_array = sess.run(hist_update, feed_dict={new_values: new_values_arr}) pmf = updated_hist_array / float(updated_hist_array.sum()) np.testing.assert_allclose(1 / 3, pmf, atol=0.02)
def test_shape_inference(self): value_range = [0.0, 5.0] values = [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]] expected_bin_counts = [2, 1, 1, 0, 2] placeholder = array_ops.placeholder(dtypes.int32) with self.session(use_gpu=True): hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5) self.assertAllEqual(hist.shape.as_list(), (5,)) self.assertEqual(dtypes.int32, hist.dtype) self.assertAllClose(expected_bin_counts, self.evaluate(hist)) hist = histogram_ops.histogram_fixed_width( values, value_range, nbins=placeholder) self.assertEquals(hist.shape.ndims, 1) self.assertIs(hist.shape.dims[0].value, None) self.assertEqual(dtypes.int32, hist.dtype) self.assertAllClose(expected_bin_counts, hist.eval({placeholder: 5}))
def test_1d_float64_values(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = np.float64([0.0, 5.0]) values = np.float64([-1.0, 0.0, 1.5, 2.0, 5.0, 15]) expected_bin_counts = [2, 1, 1, 0, 2] with self.test_session(use_gpu=True): hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5) self.assertEqual(dtypes.int32, hist.dtype) self.assertAllClose(expected_bin_counts, hist.eval())
def test_2d_values(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = [0.0, 5.0] values = [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]] expected_bin_counts = [2, 1, 1, 0, 2] with self.session(use_gpu=True): hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5) self.assertEqual(dtypes.int32, hist.dtype) self.assertAllClose(expected_bin_counts, self.evaluate(hist))
def test_empty_input_gives_all_zero_counts(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = [0.0, 5.0] values = [] expected_bin_counts = [0, 0, 0, 0, 0] with self.session(use_gpu=True): hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5) self.assertEqual(dtypes.int32, hist.dtype) self.assertAllClose(expected_bin_counts, self.evaluate(hist))
def test_1d_values_int64_output(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = [0.0, 5.0] values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] expected_bin_counts = [2, 1, 1, 0, 2] with self.session(): hist = histogram_ops.histogram_fixed_width( values, value_range, nbins=5, dtype=dtypes.int64) self.assertEqual(dtypes.int64, hist.dtype) self.assertAllClose(expected_bin_counts, self.evaluate(hist))
def test_1d_values_int64_output(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = [0.0, 5.0] values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] expected_bin_counts = [2, 1, 1, 0, 2] with self.session(use_gpu=True): hist = histogram_ops.histogram_fixed_width( values, value_range, nbins=5, dtype=dtypes.int64) self.assertEqual(dtypes.int64, hist.dtype) self.assertAllClose(expected_bin_counts, hist.eval())
def _make_auc_histograms(boolean_labels, scores, score_range, nbins): """Create histogram tensors from one batch of labels/scores.""" with variable_scope.variable_scope( None, 'make_auc_histograms', [boolean_labels, scores, nbins]): # Histogram of scores for records in this batch with True label. hist_true = histogram_ops.histogram_fixed_width( array_ops.boolean_mask(scores, boolean_labels), score_range, nbins=nbins, dtype=dtypes.int64, name='hist_true') # Histogram of scores for records in this batch with False label. hist_false = histogram_ops.histogram_fixed_width( array_ops.boolean_mask(scores, math_ops.logical_not(boolean_labels)), score_range, nbins=nbins, dtype=dtypes.int64, name='hist_false') return hist_true, hist_false
def _make_auc_histograms(boolean_labels, scores, score_range, nbins): """Create histogram tensors from one batch of labels/scores.""" with variable_scope.variable_op_scope( [boolean_labels, scores, nbins], None, 'make_auc_histograms'): # Histogram of scores for records in this batch with True label. hist_true = histogram_ops.histogram_fixed_width( array_ops.boolean_mask(scores, boolean_labels), score_range, nbins=nbins, dtype=dtypes.int64, name='hist_true') # Histogram of scores for records in this batch with False label. hist_false = histogram_ops.histogram_fixed_width( array_ops.boolean_mask(scores, math_ops.logical_not(boolean_labels)), score_range, nbins=nbins, dtype=dtypes.int64, name='hist_false') return hist_true, hist_false
def test_one_update_on_constant_2d_input(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = [0.0, 5.0] values = [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]] expected_bin_counts = [2, 1, 1, 0, 2] with self.test_session(): hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5) # Hist should start "fresh" with every eval. self.assertAllClose(expected_bin_counts, hist.eval()) self.assertAllClose(expected_bin_counts, hist.eval())
def test_empty_input_gives_all_zero_counts(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = [0.0, 5.0] values = [] expected_bin_counts = [0, 0, 0, 0, 0] with self.test_session(): hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5) # Hist should start "fresh" with every eval. self.assertAllClose(expected_bin_counts, hist.eval()) self.assertAllClose(expected_bin_counts, hist.eval())
def histogram(self, x, value_range=None, nbins=None, name=None): """Return histogram of values. Given the tensor `values`, this operation returns a rank 1 histogram counting the number of entries in `values` that fell into every bin. The bins are equal width and determined by the arguments `value_range` and `nbins`. Args: x: 1D numeric `Tensor` of items to count. value_range: Shape [2] `Tensor`. `new_values <= value_range[0]` will be mapped to `hist[0]`, `values >= value_range[1]` will be mapped to `hist[-1]`. Must be same dtype as `x`. nbins: Scalar `int32 Tensor`. Number of histogram bins. name: Python `str` name prefixed to Ops created by this class. Returns: counts: 1D `Tensor` of counts, i.e., `counts[i] = sum{ edges[i-1] <= values[j] < edges[i] : j }`. edges: 1D `Tensor` characterizing intervals used for counting. """ with ops.name_scope(name, "histogram", [x]): x = ops.convert_to_tensor(x, name="x") if value_range is None: value_range = [ math_ops.reduce_min(x), 1 + math_ops.reduce_max(x) ] value_range = ops.convert_to_tensor(value_range, name="value_range") lo = value_range[0] hi = value_range[1] if nbins is None: nbins = math_ops.to_int32(hi - lo) delta = (hi - lo) / math_ops.cast( nbins, dtype=value_range.dtype.base_dtype) edges = math_ops.range(start=lo, limit=hi, delta=delta, dtype=x.dtype.base_dtype) counts = histogram_ops.histogram_fixed_width( x, value_range=value_range, nbins=nbins) return counts, edges
def test_one_update_on_constant_2d_input(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) nbins = [5] value_range = [0.0, 5.0] new_values = [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]] expected_bin_counts = [2, 1, 1, 0, 2] with self.test_session() as sess: hist = variables.Variable(array_ops.zeros(nbins, dtype=dtypes.int32)) hist_update = histogram_ops.histogram_fixed_width(hist, new_values, value_range) variables.initialize_all_variables().run() self.assertTrue(hist.dtype.is_compatible_with(hist_update.dtype)) updated_hist_array = sess.run(hist_update) # The new updated_hist_array is returned by the updating op. self.assertAllClose(expected_bin_counts, updated_hist_array) # hist should contain updated values, but eval() should not change it. self.assertAllClose(expected_bin_counts, hist.eval()) self.assertAllClose(expected_bin_counts, hist.eval())
def test_two_updates_on_scalar_input(self): # Bins will be: # (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) value_range = [0.0, 5.0] values_1 = 1.5 values_2 = 2.5 expected_bin_counts_1 = [0, 1, 0, 0, 0] expected_bin_counts_2 = [0, 0, 1, 0, 0] with self.test_session(): values = array_ops.placeholder(dtypes.float32, shape=[]) hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5) # The values in hist should depend on the current feed and nothing else. self.assertAllClose( expected_bin_counts_2, hist.eval(feed_dict={values: values_2})) self.assertAllClose( expected_bin_counts_1, hist.eval(feed_dict={values: values_1})) self.assertAllClose( expected_bin_counts_1, hist.eval(feed_dict={values: values_1})) self.assertAllClose( expected_bin_counts_2, hist.eval(feed_dict={values: values_2}))
def histogram(self, x, value_range=None, nbins=None, name=None): """Return histogram of values. Given the tensor `values`, this operation returns a rank 1 histogram counting the number of entries in `values` that fell into every bin. The bins are equal width and determined by the arguments `value_range` and `nbins`. Args: x: 1D numeric `Tensor` of items to count. value_range: Shape [2] `Tensor`. `new_values <= value_range[0]` will be mapped to `hist[0]`, `values >= value_range[1]` will be mapped to `hist[-1]`. Must be same dtype as `x`. nbins: Scalar `int32 Tensor`. Number of histogram bins. name: Python `str` name prefixed to Ops created by this class. Returns: counts: 1D `Tensor` of counts, i.e., `counts[i] = sum{ edges[i-1] <= values[j] < edges[i] : j }`. edges: 1D `Tensor` characterizing intervals used for counting. """ with ops.name_scope(name, "histogram", [x]): x = ops.convert_to_tensor(x, name="x") if value_range is None: value_range = [math_ops.reduce_min(x), 1 + math_ops.reduce_max(x)] value_range = ops.convert_to_tensor(value_range, name="value_range") lo = value_range[0] hi = value_range[1] if nbins is None: nbins = math_ops.cast(hi - lo, dtypes.int32) delta = (hi - lo) / math_ops.cast( nbins, dtype=value_range.dtype.base_dtype) edges = math_ops.range( start=lo, limit=hi, delta=delta, dtype=x.dtype.base_dtype) counts = histogram_ops.histogram_fixed_width( x, value_range=value_range, nbins=nbins) return counts, edges