Пример #1
0
def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
    """Internal helper for creating autosummary accumulators."""
    assert not _finalized
    name_id = name.replace("/", "_")
    v = tf.cast(value_expr, _dtype)

    if v.shape.is_fully_defined():
        size = np.prod(tfutil.shape_to_list(v.shape))
        size_expr = tf.constant(size, dtype=_dtype)
    else:
        size = None
        size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))

    if size == 1:
        if v.shape.ndims != 0:
            v = tf.reshape(v, [])
        v = [size_expr, v, tf.square(v)]
    else:
        v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
    v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v),
                lambda: tf.zeros(3, dtype=_dtype))

    with tfutil.absolute_name_scope("Autosummary/" +
                                    name_id), tf.control_dependencies(None):
        var = tf.Variable(tf.zeros(3, dtype=_dtype),
                          trainable=False)  # [sum(1), sum(x), sum(x**2)]
    update_op = tf.cond(tf.is_variable_initialized(var),
                        lambda: tf.assign_add(var, v),
                        lambda: tf.assign(var, v))

    if name in _vars:
        _vars[name].append(var)
    else:
        _vars[name] = [var]
    return update_op
Пример #2
0
    def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]:
        """Get or create variable representing log2 of the current dynamic loss scaling factor."""
        if not self.use_loss_scaling:
            return None

        if device not in self._dev_ls_var:
            with tfutil.absolute_name_scope(
                    self.scope +
                    "/LossScalingVars"), tf.control_dependencies(None):
                self._dev_ls_var[device] = tf.Variable(np.float32(
                    self.loss_scaling_init),
                                                       name="loss_scaling_var")

        return self._dev_ls_var[device]
Пример #3
0
 def setup_as_moving_average_of(
         self,
         src_net: "Network",
         beta: TfExpressionEx = 0.99,
         beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation:
     """Construct a TensorFlow op that updates the variables of this network
     to be slightly closer to those of the given network."""
     with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"):
         ops = []
         for name, var in self.vars.items():
             if name in src_net.vars:
                 cur_beta = beta if name in self.trainables else beta_nontrainable
                 new_value = tfutil.lerp(src_net.vars[name], var, cur_beta)
                 ops.append(var.assign(new_value))
         return tf.group(*ops)
Пример #4
0
def autosummary(name: str,
                value: TfExpressionEx,
                passthru: TfExpressionEx = None) -> TfExpressionEx:
    """Create a new autosummary.

    Args:
        name:     Name to use in TensorBoard
        value:    TensorFlow expression or python value to track
        passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node.

    Example use of the passthru mechanism:

    n = autosummary('l2loss', loss, passthru=n)

    This is a shorthand for the following code:

    with tf.control_dependencies([autosummary('l2loss', loss)]):
        n = tf.identity(n)
    """
    tfutil.assert_tf_initialized()
    name_id = name.replace("/", "_")

    if tfutil.is_tf_expression(value):
        with tf.name_scope("summary_" + name_id), tf.device(value.device):
            update_op = _create_var(name, value)
            with tf.control_dependencies([update_op]):
                return tf.identity(value if passthru is None else passthru)

    else:  # python scalar or numpy array
        if name not in _immediate:
            with tfutil.absolute_name_scope(
                    "Autosummary/" +
                    name_id), tf.device(None), tf.control_dependencies(None):
                update_value = tf.placeholder(_dtype)
                update_op = _create_var(name, update_value)
                _immediate[name] = update_op, update_value

        update_op, update_value = _immediate[name]
        tfutil.run(update_op, {update_value: value})
        return value if passthru is None else passthru
Пример #5
0
    def run(
        self,
        *in_arrays: Tuple[Union[np.ndarray, None], ...],
        input_transform: dict = None,
        output_transform: dict = None,
        return_as_list: bool = False,
        print_progress: bool = False,
        minibatch_size: int = None,
        num_gpus: int = 1,
        assume_frozen: bool = False,
        **dynamic_kwargs
    ) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
        """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).

        Args:
            input_transform:    A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
                                The dict must contain a 'func' field that points to a top-level function. The function is called with the input
                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
            output_transform:   A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
                                The dict must contain a 'func' field that points to a top-level function. The function is called with the output
                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
            return_as_list:     True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
            print_progress:     Print progress to the console? Useful for very large input arrays.
            minibatch_size:     Maximum minibatch size to use, None = disable batching.
            num_gpus:           Number of GPUs to use.
            assume_frozen:      Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
            dynamic_kwargs:     Additional keyword arguments to be passed into the network build function.
        """
        assert len(in_arrays) == self.num_inputs
        assert not all(arr is None for arr in in_arrays)
        assert input_transform is None or util.is_top_level_function(
            input_transform["func"])
        assert output_transform is None or util.is_top_level_function(
            output_transform["func"])
        output_transform, dynamic_kwargs = _handle_legacy_output_transforms(
            output_transform, dynamic_kwargs)
        num_items = in_arrays[0].shape[0]
        if minibatch_size is None:
            minibatch_size = num_items

        # Construct unique hash key from all arguments that affect the TensorFlow graph.
        key = dict(input_transform=input_transform,
                   output_transform=output_transform,
                   num_gpus=num_gpus,
                   assume_frozen=assume_frozen,
                   dynamic_kwargs=dynamic_kwargs)

        def unwind_key(obj):
            if isinstance(obj, dict):
                return [(key, unwind_key(value))
                        for key, value in sorted(obj.items())]
            if callable(obj):
                return util.get_top_level_function_name(obj)
            return obj

        key = repr(unwind_key(key))

        # Build graph.
        if key not in self._run_cache:
            with tfutil.absolute_name_scope(
                    self.scope + "/_Run"), tf.control_dependencies(None):
                with tf.device("/cpu:0"):
                    in_expr = [
                        tf.placeholder(tf.float32, name=name)
                        for name in self.input_names
                    ]
                    in_split = list(
                        zip(*[tf.split(x, num_gpus) for x in in_expr]))

                out_split = []
                for gpu in range(num_gpus):
                    with tf.device("/gpu:%d" % gpu):
                        net_gpu = self.clone() if assume_frozen else self
                        in_gpu = in_split[gpu]

                        if input_transform is not None:
                            in_kwargs = dict(input_transform)
                            in_gpu = in_kwargs.pop("func")(*in_gpu,
                                                           **in_kwargs)
                            in_gpu = [in_gpu] if tfutil.is_tf_expression(
                                in_gpu) else list(in_gpu)

                        assert len(in_gpu) == self.num_inputs
                        out_gpu = net_gpu.get_output_for(*in_gpu,
                                                         return_as_list=True,
                                                         **dynamic_kwargs)

                        if output_transform is not None:
                            out_kwargs = dict(output_transform)
                            out_gpu = out_kwargs.pop("func")(*out_gpu,
                                                             **out_kwargs)
                            out_gpu = [out_gpu] if tfutil.is_tf_expression(
                                out_gpu) else list(out_gpu)

                        assert len(out_gpu) == self.num_outputs
                        out_split.append(out_gpu)

                with tf.device("/cpu:0"):
                    out_expr = [
                        tf.concat(outputs, axis=0)
                        for outputs in zip(*out_split)
                    ]
                    self._run_cache[key] = in_expr, out_expr

        # Run minibatches.
        in_expr, out_expr = self._run_cache[key]
        out_arrays = [
            np.empty([num_items] + tfutil.shape_to_list(expr.shape)[1:],
                     expr.dtype.name) for expr in out_expr
        ]

        for mb_begin in range(0, num_items, minibatch_size):
            if print_progress:
                print("\r%d / %d" % (mb_begin, num_items), end="")

            mb_end = min(mb_begin + minibatch_size, num_items)
            mb_num = mb_end - mb_begin
            mb_in = [
                src[mb_begin:mb_end]
                if src is not None else np.zeros([mb_num] + shape[1:])
                for src, shape in zip(in_arrays, self.input_shapes)
            ]
            mb_out = tf.get_default_session().run(out_expr,
                                                  dict(zip(in_expr, mb_in)))

            for dst, src in zip(out_arrays, mb_out):
                dst[mb_begin:mb_end] = src

        # Done.
        if print_progress:
            print("\r%d / %d" % (num_items, num_items))

        if not return_as_list:
            out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(
                out_arrays)
        return out_arrays
Пример #6
0
    def _init_graph(self) -> None:
        # Collect inputs.
        self.input_names = []

        for param in inspect.signature(self._build_func).parameters.values():
            if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty:
                self.input_names.append(param.name)

        self.num_inputs = len(self.input_names)
        assert self.num_inputs >= 1

        # Choose name and scope.
        if self.name is None:
            self.name = self._build_func_name
        assert re.match("^[A-Za-z0-9_.\\-]*$", self.name)
        with tf.name_scope(None):
            self.scope = tf.get_default_graph().unique_name(self.name,
                                                            mark_as_used=True)

        # Finalize build func kwargs.
        build_kwargs = dict(self.static_kwargs)
        build_kwargs["is_template_graph"] = True
        build_kwargs["components"] = self.components

        # Build template graph.
        with tfutil.absolute_variable_scope(
                self.scope, reuse=tf.AUTO_REUSE), tfutil.absolute_name_scope(
                    self.scope):  # ignore surrounding scopes
            assert tf.get_variable_scope().name == self.scope
            assert tf.get_default_graph().get_name_scope() == self.scope
            with tf.control_dependencies(
                    None):  # ignore surrounding control dependencies
                self.input_templates = [
                    tf.placeholder(tf.float32, name=name)
                    for name in self.input_names
                ]
                out_expr = self._build_func(*self.input_templates,
                                            **build_kwargs)

        # Collect outputs.
        assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
        self.output_templates = [
            out_expr
        ] if tfutil.is_tf_expression(out_expr) else list(out_expr)
        self.num_outputs = len(self.output_templates)
        assert self.num_outputs >= 1
        assert all(tfutil.is_tf_expression(t) for t in self.output_templates)

        # Perform sanity checks.
        if any(t.shape.ndims is None for t in self.input_templates):
            raise ValueError(
                "Network input shapes not defined. Please call x.set_shape() for each input."
            )
        if any(t.shape.ndims is None for t in self.output_templates):
            raise ValueError(
                "Network output shapes not defined. Please call x.set_shape() where applicable."
            )
        if any(not isinstance(comp, Network)
               for comp in self.components.values()):
            raise ValueError(
                "Components of a Network must be Networks themselves.")
        if len(self.components) != len(
                set(comp.name for comp in self.components.values())):
            raise ValueError("Components of a Network must have unique names.")

        # List inputs and outputs.
        self.input_shapes = [
            tfutil.shape_to_list(t.shape) for t in self.input_templates
        ]
        self.output_shapes = [
            tfutil.shape_to_list(t.shape) for t in self.output_templates
        ]
        self.input_shape = self.input_shapes[0]
        self.output_shape = self.output_shapes[0]
        self.output_names = [
            t.name.split("/")[-1].split(":")[0] for t in self.output_templates
        ]

        # List variables.
        self.own_vars = OrderedDict(
            (var.name[len(self.scope) + 1:].split(":")[0], var)
            for var in tf.global_variables(self.scope + "/"))
        self.vars = OrderedDict(self.own_vars)
        self.vars.update((comp.name + "/" + name, var)
                         for comp in self.components.values()
                         for name, var in comp.vars.items())
        self.trainables = OrderedDict(
            (name, var) for name, var in self.vars.items() if var.trainable)
        self.var_global_to_local = OrderedDict(
            (var.name.split(":")[0], name) for name, var in self.vars.items())
Пример #7
0
def finalize_autosummaries() -> None:
    """Create the necessary ops to include autosummaries in TensorBoard report.
    Note: This should be done only once per graph.
    """
    global _finalized
    tfutil.assert_tf_initialized()

    if _finalized:
        return None

    _finalized = True
    tfutil.init_uninitialized_vars(
        [var for vars_list in _vars.values() for var in vars_list])

    # Create summary ops.
    with tf.device(None), tf.control_dependencies(None):
        for name, vars_list in _vars.items():
            name_id = name.replace("/", "_")
            with tfutil.absolute_name_scope("Autosummary/" + name_id):
                moments = tf.add_n(vars_list)
                moments /= moments[0]
                with tf.control_dependencies([moments
                                              ]):  # read before resetting
                    reset_ops = [
                        tf.assign(var, tf.zeros(3, dtype=_dtype))
                        for var in vars_list
                    ]
                    with tf.name_scope(None), tf.control_dependencies(
                            reset_ops):  # reset before reporting
                        mean = moments[1]
                        std = tf.sqrt(moments[2] - tf.square(moments[1]))
                        tf.summary.scalar(name, mean)
                        tf.summary.scalar(
                            "xCustomScalars/" + name + "/margin_lo",
                            mean - std)
                        tf.summary.scalar(
                            "xCustomScalars/" + name + "/margin_hi",
                            mean + std)

    # Group by category and chart name.
    cat_dict = OrderedDict()
    for series_name in sorted(_vars.keys()):
        p = series_name.split("/")
        cat = p[0] if len(p) >= 2 else ""
        chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1]
        if cat not in cat_dict:
            cat_dict[cat] = OrderedDict()
        if chart not in cat_dict[cat]:
            cat_dict[cat][chart] = []
        cat_dict[cat][chart].append(series_name)

    # Setup custom_scalar layout.
    categories = []
    for cat_name, chart_dict in cat_dict.items():
        charts = []
        for chart_name, series_names in chart_dict.items():
            series = []
            for series_name in series_names:
                series.append(
                    layout_pb2.MarginChartContent.Series(
                        value=series_name,
                        lower="xCustomScalars/" + series_name + "/margin_lo",
                        upper="xCustomScalars/" + series_name + "/margin_hi"))
            margin = layout_pb2.MarginChartContent(series=series)
            charts.append(layout_pb2.Chart(title=chart_name, margin=margin))
        categories.append(layout_pb2.Category(title=cat_name, chart=charts))
    layout = summary_lib.custom_scalar_pb(
        layout_pb2.Layout(category=categories))
    return layout
Пример #8
0
    def apply_updates(self) -> tf.Operation:
        """Construct training op to update the registered variables based on their gradients."""
        tfutil.assert_tf_initialized()
        assert not self._updates_applied
        self._updates_applied = True
        devices = list(self._dev_grads.keys())
        total_grads = sum(len(grads) for grads in self._dev_grads.values())
        assert len(devices) >= 1 and total_grads >= 1
        ops = []

        with tfutil.absolute_name_scope(self.scope):
            # Cast gradients to FP32 and calculate partial sum within each device.
            dev_grads = OrderedDict()  # device => [(grad, var), ...]

            for dev_idx, dev in enumerate(devices):
                with tf.name_scope("ProcessGrads%d" % dev_idx), tf.device(dev):
                    sums = []

                    for gv in zip(*self._dev_grads[dev]):
                        assert all(v is gv[0][1] for g, v in gv)
                        g = [tf.cast(g, tf.float32) for g, v in gv]
                        g = g[0] if len(g) == 1 else tf.add_n(g)
                        sums.append((g, gv[0][1]))

                    dev_grads[dev] = sums

            # Sum gradients across devices.
            if len(devices) > 1:
                with tf.name_scope("SumAcrossGPUs"), tf.device(None):
                    for var_idx, grad_shape in enumerate(self._grad_shapes):
                        g = [dev_grads[dev][var_idx][0] for dev in devices]

                        if np.prod(
                                grad_shape
                        ):  # nccl does not support zero-sized tensors
                            g = nccl_ops.all_sum(g)

                        for dev, gg in zip(devices, g):
                            dev_grads[dev][var_idx] = (
                                gg, dev_grads[dev][var_idx][1])

            # Apply updates separately on each device.
            for dev_idx, (dev, grads) in enumerate(dev_grads.items()):
                with tf.name_scope("ApplyGrads%d" % dev_idx), tf.device(dev):
                    # Scale gradients as needed.
                    if self.use_loss_scaling or total_grads > 1:
                        with tf.name_scope("Scale"):
                            coef = tf.constant(np.float32(1.0 / total_grads),
                                               name="coef")
                            coef = self.undo_loss_scaling(coef)
                            grads = [(g * coef, v) for g, v in grads]

                    # Check for overflows.
                    with tf.name_scope("CheckOverflow"):
                        grad_ok = tf.reduce_all(
                            tf.stack([
                                tf.reduce_all(tf.is_finite(g))
                                for g, v in grads
                            ]))

                    # Update weights and adjust loss scaling.
                    with tf.name_scope("UpdateWeights"):
                        # pylint: disable=cell-var-from-loop
                        opt = self._dev_opt[dev]
                        ls_var = self.get_loss_scaling_var(dev)

                        if not self.use_loss_scaling:
                            ops.append(
                                tf.cond(grad_ok,
                                        lambda: opt.apply_gradients(grads),
                                        tf.no_op))
                        else:
                            ops.append(
                                tf.cond(
                                    grad_ok, lambda: tf.group(
                                        tf.assign_add(ls_var, self.
                                                      loss_scaling_inc),
                                        opt.apply_gradients(grads)),
                                    lambda: tf.group(
                                        tf.assign_sub(ls_var, self.
                                                      loss_scaling_dec))))

                    # Report statistics on the last device.
                    if dev == devices[-1]:
                        with tf.name_scope("Statistics"):
                            ops.append(
                                autosummary.autosummary(
                                    self.id + "/learning_rate",
                                    self.learning_rate))
                            ops.append(
                                autosummary.autosummary(
                                    self.id + "/overflow_frequency",
                                    tf.where(grad_ok, 0, 1)))

                            if self.use_loss_scaling:
                                ops.append(
                                    autosummary.autosummary(
                                        self.id + "/loss_scaling_log2",
                                        ls_var))

            # Initialize variables and group everything into a single op.
            self.reset_optimizer_state()
            tfutil.init_uninitialized_vars(list(self._dev_ls_var.values()))

            return tf.group(*ops, name="TrainingOp")