Exemplo n.º 1
0
def multi_step_lr_schedule(
    global_step: tf_compat.Tensor,
    start_step: int,
    milestone_steps: List[int],
    init_lr: float,
    gamma: float,
    name: str = "multi_step_lr_schedule",
):
    """
    Create a multi step learning rate schedule in the current graph.
    Multiplies init_lr by gamma after each milestone has passed.
    Ex: lr = init_lr * (gamma ** NUM_UPDATES)

    :param global_step: the global step used for training
    :param start_step: the step to start the exponential schedule on
    :param milestone_steps: a list of steps to decrease the learning rate at,
        these are the number of steps that must pass after start_step to decrease lr
    :param init_lr: the learning rate to start the schedule with
    :param gamma: the decay weight to decrease init_lr by after every step_size interval
    :param name: the name scope to create the graph under
    :return: the calculated learning rate tensor
    """
    with tf_compat.name_scope(name):
        global_step = tf_compat.cast(global_step, tf_compat.int64)
        milestone_steps = tf_compat.constant(
            [mile + start_step for mile in milestone_steps],
            dtype=tf_compat.int64,
            name="milestone_steps",
        )
        start_step = tf_compat.constant(start_step,
                                        dtype=tf_compat.int64,
                                        name="start_step")
        init_lr = tf_compat.constant(init_lr,
                                     dtype=tf_compat.float32,
                                     name="init_lr")
        gamma = tf_compat.constant(gamma,
                                   dtype=tf_compat.float32,
                                   name="gamma")
        before = tf_compat.less(global_step, start_step, name="before")

        def _calc_lr():
            less = tf_compat.cast(
                tf_compat.greater_equal(global_step, milestone_steps),
                tf_compat.int64)
            updates = tf_compat.reduce_sum(less)
            mult_g = tf_compat.pow(gamma,
                                   tf_compat.cast(updates, tf_compat.float32))

            return tf_compat.multiply(init_lr, mult_g)

        learning_rate = tf_compat.cond(before,
                                       lambda: init_lr,
                                       _calc_lr,
                                       name="learning_rate")

    return learning_rate
Exemplo n.º 2
0
def imagenet_normalizer(img):
    """
    Normalize an image using mean and std of the imagenet dataset

    :param img: The input image to normalize
    :return: The normalized image
    """
    img = tf_compat_div(img, 255.0)
    means = tf_compat.constant(IMAGENET_RGB_MEANS, dtype=tf_compat.float32)
    stds = tf_compat.constant(IMAGENET_RGB_STDS, dtype=tf_compat.float32)
    img = tf_compat_div(tf_compat.subtract(img, means), stds)

    return img
Exemplo n.º 3
0
def symmetric_pad2d(x_tens: tf_compat.Tensor,
                    pad: Union[str, int, Tuple[int, int]], data_format: str):
    """
    Create a symmetric pad op in the current graph and scope.
    To do this, pad must be an integer or tuple of integers.
    If pad is a string, will not do anything and pad should be passed into
    the pool or conv op.

    :param x_tens: the tensor to apply padding to
    :param pad: the padding to apply symmetrically. If it is a single integer,
        will apply to both sides of height and width dimensions.
        If it is a tuple, will take the first element as the padding for
        both sides of height dimensions and second for booth sides of width ddimension.
    :param data_format: either channels_last or channels_first
    :return: the padded tensor
    """
    if isinstance(pad, str):
        # default tensorflow_v1 padding
        return x_tens

    y_pad = [pad, pad] if isinstance(pad, int) else [pad[0], pad[0]]
    x_pad = [pad, pad] if isinstance(pad, int) else [pad[1], pad[1]]
    pad_tensor = ([[0, 0], y_pad, x_pad, [0, 0]] if data_format
                  == "channels_last" else [[0, 0], [0, 0], y_pad, x_pad])
    pad_tensor = tf_compat.constant(pad_tensor)

    return tf_compat.pad(x_tens, pad_tensor)
Exemplo n.º 4
0
    def create_ops(
        self,
        steps_per_epoch: int,
        global_step: Optional[tf_compat.Variable],
        graph: Optional[tf_compat.Graph],
    ) -> Tuple[List[Union[tf_compat.Tensor, tf_compat.Operation]], Dict[str, Any]]:
        """
        Create ops to set the learning rate at a given value if the global step reaches
        a given value

        :param steps_per_epoch: the number of steps (batches) per training epoch
        :param global_step: the global step used while training
        :param graph: the graph to be modified
        :return: a tuple (empty list of ops,
            dict of learning rate and logging summaries)
        """
        mod_ops, mod_extras = super().create_ops(graph, steps_per_epoch, global_step)
        name_scope = "{}/{}".format(NM_RECAL, self.__class__.__name__)

        with graph.as_default():
            with tf_compat.name_scope(name_scope):
                learning_rate = tf_compat.constant(
                    self.learning_rate, tf_compat.float32, name="learning_rate"
                )

            _add_lr_extras(mod_extras, learning_rate, self.log_types)

        return mod_ops, mod_extras
Exemplo n.º 5
0
def pruning_loss_sens_op_vars(
    graph: tf_compat.Graph = None,
    var_names: Union[List[str], Tuple[str]] = ("re:.*", ),
    mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured",
) -> List[SparsePruningOpVars]:
    """
    Edit the graph for to inject pruning ops and vars to allow for a ks loss
    sensitivity analysis.

    Note: this must be run outside of a session for it to take effect.

    :param graph: the graph to inject pruning ops and vars into,
        if not supplied uses get_default_graph()
    :param var_names: List of variable names or regex patterns of variables to get
        the op vars for.  Defaults to matching all variables
    :param mask_type: String to define type of sparsity (options: ['unstructured',
        'channel', 'filter']), List to define block shape of a parameter's in and out
        channels, or a SparsityMaskCreator object. default is 'unstructured'
    :return: the created pruning op vars to be used in approx_ks_loss_sensitivity and
        one_shot_ks_loss_sensitivity
    """

    if not graph:
        graph = tf_compat.get_default_graph()

    mask_creator = mask_type
    if not isinstance(mask_type, PruningMaskCreator):
        mask_creator = load_mask_creator(mask_type)

    ks_group = pruning_loss_sens_one_shot.__name__
    prunable_ops_and_inputs = get_ops_and_inputs_by_name_or_regex(
        var_names, graph)
    op_vars = []

    with graph.as_default():
        for prune_op, prune_op_input in prunable_ops_and_inputs:
            with tf_compat.name_scope(
                    PruningScope.model(prune_op, ks_group,
                                       trailing_slash=True)):
                sparsity = tf_compat.placeholder(dtype=tf_compat.float32,
                                                 name="sparsity_placeholder")
                update = tf_compat.constant(True, tf_compat.bool)
            prune_op_var = create_op_pruning(
                prune_op,
                prune_op_input,
                sparsity,
                update,
                True,
                None,
                ks_group,
                mask_creator,
            )
            op_vars.append(SparsePruningOpVars(prune_op_var, sparsity))

    return op_vars
Exemplo n.º 6
0
 def _no_op():
     with tf_compat.name_scope(
             PruningScope.model(
                 op,
                 ks_group,
                 additional=PruningScope.OPS_UPDATE,
                 trailing_slash=True,
             )):
         # return no op wrapped in group to match update type
         return tf_compat.group(
             tf_compat.constant(0.0,
                                dtype=op_var_tens.dtype,
                                name=PruningScope.OP_MASK_UPDATE_NO_OP))
Exemplo n.º 7
0
    def creator(self):
        """
        :return: a created dataset that gives the file_path and label for each
            image under self.root
        """
        labels_strs = [
            fold.split(os.path.sep)[-1]
            for fold in glob.glob(os.path.join(self.root, "*"))
        ]
        labels_strs.sort()
        labels_dict = {
            lab: numpy.identity(len(labels_strs))[index].tolist()
            for index, lab in enumerate(labels_strs)
        }
        files_labels = [
            (file, labels_dict[file.split(os.path.sep)[-2]])
            for file in glob.glob(os.path.join(self.root, "*", "*"))
        ]
        random.Random(42).shuffle(files_labels)
        files, labels = zip(*files_labels)
        files = tf_compat.constant(files)
        labels = tf_compat.constant(labels)

        return tf_compat.data.Dataset.from_tensor_slices((files, labels))
Exemplo n.º 8
0
    def create_ops(
        self,
        steps_per_epoch: int,
        global_step: tf_compat.Variable,
        graph: tf_compat.Graph,
    ) -> Tuple[List[Union[tf_compat.Tensor, tf_compat.Operation]], Dict[str, Any]]:
        """
        Create switch case computing the learning rate at a given global step and
        extras created by individual LR modifiers

        :param steps_per_epoch: the number of steps per training epoch
        :param global_step: the global step used while training
        :param graph: the graph to be modified
        :return: a tuple (list of empty ops, dict of named ops/tensors for learning
            rate and summaries as extras)
        """
        mod_ops, mod_extras = super().create_ops(graph, steps_per_epoch, global_step)
        name_scope = "{}/{}".format(NM_RECAL, self.__class__.__name__)

        with graph.as_default():
            with tf_compat.name_scope(name_scope):
                pred_fn_pairs = []
                global_step = tf_compat.cast(global_step, tf_compat.int64)

                for index, child in enumerate(self._lr_modifiers):
                    with tf_compat.name_scope(str(index)):
                        _, child_extras = child.create_ops(
                            steps_per_epoch, global_step, graph
                        )
                        child_lr = child_extras[EXTRAS_KEY_LEARNING_RATE]
                        child_start_step, _ = child.start_end_steps(
                            steps_per_epoch, after_optim=False
                        )
                        child_select = tf_compat.greater_equal(
                            global_step,
                            tf_compat.constant(child_start_step, tf_compat.int64),
                            name="active",
                        )
                        pred_fn_pairs.append((child_select, lambda lr=child_lr: lr))

                learning_rate = tf_compat.case(pred_fn_pairs)
                _add_lr_extras(mod_extras, learning_rate, self.log_types)

        return mod_ops, mod_extras
Exemplo n.º 9
0
def create_ks_schedule_ops(
    global_step: tf_compat.Variable,
    begin_step: int,
    end_step: int,
    update_step_freq: int,
    init_sparsity: float,
    final_sparsity: float,
    exponent: float,
    ks_group: str,
) -> Tuple[tf_compat.Tensor, tf_compat.Tensor]:
    """
    Create a gradual schedule for model pruning (kernel sparsity).
    Creates a sparsity tensor that goes from init_sparsity til final_sparsity
    starting at begin_step and ending at end_step.
    Uses the global_step to map those.
    Additionally creates an update_ready tensor that is True if an update
    to the sparsity tensor should be run, False otherwise.

    :param global_step: the global optimizer step for the training graph
    :param begin_step: the global step to begin pruning at
    :param end_step: the global step to end pruning at
    :param update_step_freq: the number of global steps between each weight update
    :param init_sparsity: the starting value for sparsity of a
        weight tensor to be enforce
    :param final_sparsity: the end value for sparsity for a weight tensor to be enforce
    :param exponent: the exponent to use for interpolating between
        init_sparsity and final_sparsity higher values will lead to larger sparsity
        steps at the beginning vs the end ie: linear (1) vs cubic (3)
    :param ks_group: the group identifier the scope should be created under
    :return: a tuple containing the signal for update_ready and the target sparsity
    """

    # create the scheduling ops first and the sparsity ops
    with tf_compat.name_scope(
            PruningScope.general(ks_group,
                                 additional=PruningScope.OPS_SCHEDULE,
                                 trailing_slash=True)):
        sched_before = tf_compat.less(global_step, begin_step)
        sched_start = tf_compat.equal(global_step, begin_step)
        sched_end = tf_compat.equal(global_step, end_step)
        sched_active = tf_compat.logical_and(
            tf_compat.greater(global_step, begin_step),
            tf_compat.less(global_step, end_step),
        )
        sched_active_inclusive = tf_compat.logical_or(
            sched_active, tf_compat.logical_or(sched_start, sched_end))
        sched_update = tf_compat.cond(
            tf_compat.less_equal(update_step_freq, 0),
            lambda: tf_compat.constant(True),
            lambda: tf_compat.equal(
                tf_compat.mod(
                    (global_step - begin_step), update_step_freq), 0),
        )
        sched_update_ready = tf_compat.logical_or(
            tf_compat.logical_or(sched_start, sched_end), sched_update)

        percentage = tf_compat.minimum(
            1.0,
            tf_compat.maximum(
                0.0,
                tf_compat_div(
                    tf_compat.cast(global_step - begin_step,
                                   tf_compat.float32),
                    end_step - begin_step,
                ),
            ),
        )
        exp_percentage = 1 - tf_compat.pow(1 - percentage, exponent)
        calc_sparsity = (tf_compat.multiply(final_sparsity - init_sparsity,
                                            exp_percentage) + init_sparsity)

        # create the update ready tensor and sparsity tensor
    with tf_compat.name_scope(
            PruningScope.general(ks_group, trailing_slash=True)):
        update_ready = tf_compat.logical_and(
            sched_active_inclusive,
            sched_update_ready,
            name=PruningScope.OP_UPDATE_READY,
        )
        sparsity = tf_compat.case(
            [
                (sched_before, lambda: tf_compat.constant(0.0)),
                (sched_start, lambda: tf_compat.constant(init_sparsity)),
                (sched_active, lambda: calc_sparsity),
            ],
            default=lambda: tf_compat.constant(final_sparsity),
            name=PruningScope.OP_SPARSITY,
        )

        # add return state to collections
    tf_compat.add_to_collection(
        PruningScope.collection_name(ks_group, PruningScope.OP_UPDATE_READY),
        update_ready,
    )
    tf_compat.add_to_collection(
        PruningScope.collection_name(ks_group, PruningScope.OP_SPARSITY),
        sparsity)

    return update_ready, sparsity
Exemplo n.º 10
0
def step_lr_schedule(
    global_step: tf_compat.Tensor,
    start_step: int,
    end_step: int,
    step_size: int,
    init_lr: float,
    gamma: float,
    name: str = "exponential_lr_schedule",
) -> tf_compat.Tensor:
    """
    Create an exponential learning rate schedule in the current graph.
    Multiplies init_lr by gamma after each step_size interval has passed.
    Ex: lr = init_lr * (gamma ** NUM_UPDATES)

    :param global_step: the global step used for training
    :param start_step: the step to start the exponential schedule on
    :param end_step: the step to end the exponential schedule on,
        can be set to -1 and in that event will continually update the LR
    :param step_size: the number of steps between each gamma update to the init_lr
    :param init_lr: the learning rate to start the schedule with
    :param gamma: the decay weight to decrease init_lr by after every step_size interval
    :param name: the name scope to create the graph under
    :return: the calculated learning rate tensor
    """
    with tf_compat.name_scope(name):
        global_step = tf_compat.cast(global_step, tf_compat.int64)
        max_updates = tf_compat.constant(
            (end_step - start_step) // step_size if end_step > 0 else -1,
            dtype=tf_compat.int64,
            name="max_updates",
        )
        start_step = tf_compat.constant(start_step,
                                        dtype=tf_compat.int64,
                                        name="start_step")
        end_step = tf_compat.constant(end_step,
                                      dtype=tf_compat.int64,
                                      name="end_step")
        init_lr = tf_compat.constant(init_lr,
                                     dtype=tf_compat.float32,
                                     name="init_lr")
        step_size = tf_compat.constant(step_size,
                                       dtype=tf_compat.int64,
                                       name="step_size")
        gamma = tf_compat.constant(gamma,
                                   dtype=tf_compat.float32,
                                   name="gamma")
        before = tf_compat.less(global_step, start_step, name="before")
        after = tf_compat.logical_and(
            tf_compat.greater_equal(global_step, end_step, name="after"),
            tf_compat.not_equal(end_step,
                                tf_compat.constant(-1, tf_compat.int64)),
        )

        def _calc_lr():
            steps = tf_compat.subtract(global_step, start_step)
            updates = tf_compat.cond(
                after,
                lambda: max_updates,
                lambda: tf_compat.cast(
                    tf_compat.floor(tf_compat.divide(steps, step_size)),
                    tf_compat.int64,
                ),
            )
            mult_g = tf_compat.pow(gamma,
                                   tf_compat.cast(updates, tf_compat.float32))

            return tf_compat.multiply(init_lr, mult_g)

        learning_rate = tf_compat.cond(before,
                                       lambda: init_lr,
                                       _calc_lr,
                                       name="learning_rate")

    return learning_rate