def jvp_of_rule_rule(axis_size, in_batched, primals, tangents): in_batched_ps, in_batched_ts = in_batched mutually_batched = tree_map(operator.and_, in_batched_ps, in_batched_ts) extra_batched_ps = tree_map(lambda pb, tb: 0 if pb and not tb else None, in_batched_ps, in_batched_ts) extra_batched_ts = tree_map(lambda pb, tb: 0 if tb and not pb else None, in_batched_ps, in_batched_ts) out_mutually_batched = lu.Store() flat_ps_ts, tree_ps_ts = tree_flatten((primals, tangents)) flat_extra_batched_ps_ts, tree_ps_ts2 = tree_flatten( (extra_batched_ps, extra_batched_ts), is_leaf=lambda x: x is None) # TODO(frostig): assert these also equal: # treedef_tuple((in_tree, in_tree)) # once https://github.com/google/jax/issues/9066 is fixed assert tree_ps_ts == tree_ps_ts2 del tree_ps_ts2 def to_jvp(*primals): out, out_batched = call_rule(rule, axis_size, mutually_batched, primals) check_vmap_rule_trees( rule, out_tree, tree_structure(out), tree_structure(out_batched)) out_mutually_batched.store(out_batched) return out def to_vmap_over_extra_batched_dims(primals, tangents): return jax.jvp(to_jvp, primals, tangents) to_vmap_over_extra_batched_dims_flat, out_tree2 = flatten_fun_nokwargs( lu.wrap_init(to_vmap_over_extra_batched_dims), tree_ps_ts) flat_out_ps_ts, flat_out_axes = vmap_unrestricted( to_vmap_over_extra_batched_dims_flat, *flat_ps_ts, in_axes=flat_extra_batched_ps_ts, axis_name=core.no_axis_name, axis_size=axis_size) n, ragged = divmod(len(flat_out_ps_ts), 2) assert not ragged flat_out_ps, flat_out_ts = flat_out_ps_ts[:n], flat_out_ps_ts[n:] flat_out_axes_p, flat_out_axes_t = flat_out_axes[:n], flat_out_axes[n:] flat_out_ps = map(maybe_bdim_at_front, flat_out_ps, flat_out_axes_p) flat_out_extra_batched_ps = [d is not not_mapped for d in flat_out_axes_p] flat_out_ts = map(maybe_bdim_at_front, flat_out_ts, flat_out_axes_t) flat_out_extra_batched_ts = [d is not not_mapped for d in flat_out_axes_t] out_ps, out_ts = tree_unflatten( out_tree2(), [*flat_out_ps, *flat_out_ts]) out_extra_batched_ps, out_extra_batched_ts = tree_unflatten( out_tree2(), [*flat_out_extra_batched_ps, *flat_out_extra_batched_ts]) out_batched_ps = tree_map( operator.or_, out_mutually_batched.val, out_extra_batched_ps) out_batched_ts = tree_map( operator.or_, out_mutually_batched.val, out_extra_batched_ts) return (out_ps, out_ts), (out_batched_ps, out_batched_ts)
def _xla_call_impl(fun: lu.WrappedFun, *args, device, backend, name, donated_invars, inline): del inline # Only used at tracing time compiled_fun = _xla_callable(fun, device, backend, name, donated_invars, *unsafe_map(arg_spec, args)) try: out = compiled_fun(*args) except FloatingPointError: assert config.jax_debug_nans or config.jax_debug_infs # compiled_fun can only raise in this case print( "Invalid value encountered in the output of a jit/pmap-ed function. " "Calling the de-optimized version.") # We want to run the wrapped function again (after _xla_callable already ran # it), but linear_util.WrappedFun instances are meant to be run only once. # In addition to re-executing the Python code, which is usually undesirable # but which config.jax_debug_nans is meant to opt into, we'll be re-executing # any linear_util.py-style side effects, i.e. re-populating Stores created # by any transformation_with_aux's applied to fun. Since this is # intentional here, to avoid "Store occupied" errors we clone the WrappedFun # with empty stores. stores = [lu.Store() for _ in fun.stores] clone = lu.WrappedFun(fun.f, fun.transforms, stores, fun.params) with core.new_sublevel(): _ = clone.call_wrapped(*args) # probably won't return return out
def _xla_call_impl(fun: lu.WrappedFun, *args, device, backend, name, donated_invars, inline, keep_unused: bool): del inline # Only used at tracing time arg_specs = unsafe_map(arg_spec, args) if fun.in_type is not None: arg_specs = [(None, *xs) for _, *xs in arg_specs] compiled_fun = _xla_callable(fun, device, backend, name, donated_invars, keep_unused, *arg_specs) try: return compiled_fun(*args) except FloatingPointError: assert config.jax_debug_nans or config.jax_debug_infs # compiled_fun can only raise in this case print( "Invalid value encountered in the output of a jit-decorated function. " "Calling the de-optimized version.") # We want to run the wrapped function again (after _xla_callable already ran # it), but linear_util.WrappedFun instances are meant to be run only once. # In addition to re-executing the Python code, which is usually undesirable # but which config.jax_debug_nans is meant to opt into, we'll be # re-executing any linear_util.py-style side effects, i.e. re-populating # Stores created by any transformation_with_aux's applied to fun. Since this # is intentional here, to avoid "Store occupied" errors we clone the # WrappedFun with empty stores. stores = [lu.Store() for _ in fun.stores] clone = lu.WrappedFun(fun.f, fun.transforms, stores, fun.params, fun.in_type) with core.new_sublevel(): _ = clone.call_wrapped(*args) # may raise, not return # If control reaches this line, we got a NaN on the output of `compiled_fun` # but not `clone.call_wrapped` on the same arguments. Let's tell the user. fun_info = pe.fun_sourceinfo(fun.f) msg = ( "An invalid value was encountered in the output of the " f"`jit`-decorated function {fun_info}. Because " "config.jax_debug_nans and/or config.jax_debug_infs is set, the " "de-optimized function (i.e., the function as if the `jit` " "decorator were removed) was called in an attempt to get a more " "precise error message. However, the de-optimized function did not " "produce invalid values during its execution. This behavior can " "result from `jit` optimizations causing the invalud value to be " "produced. It may also arise from having nan/inf constants as " "outputs, like `jax.jit(lambda ...: jax.numpy.nan)(...)`. " "\n\n" "It may be possible to avoid the invalid value by removing the " "`jit` decorator, at the cost of losing optimizations. " "\n\n" "If you see this error, consider opening a bug report at " "https://github.com/google/jax.") raise FloatingPointError(msg)