def create_training_op(self, loss: tf_compat.Tensor, params: Dict[str, Any]) -> tf_compat.Operation: """ Create training op for optimization :param loss: the loss tensor :param params: the model function params :return: an Operation minimizing loss """ global_step = tf_compat.train.get_or_create_global_step() optimizer_const = {} for opt_name in dir(tf_compat.train): opt_cls = getattr(tf_compat.train, opt_name) if inspect.isclass(opt_cls) and issubclass( opt_cls, tf_compat.train.Optimizer): optimizer_const[opt_name] = opt_cls optimizer_name = params.get("optimizer", "AdamOptimizer") if optimizer_name not in optimizer_const: raise ValueError( "Unsupported optimizer: {}".format(optimizer_name)) optimizer_params = params.get("optimizer_params", {}) optimizer = optimizer_const[optimizer_name](**optimizer_params) with tf_compat.name_scope("train"): # We are using tf.layers.batch_normalization to support previous versions # of TF, which requires us explicite model the dependency between the # update of moving average and variance with training op update_ops = tf_compat.get_collection( tf_compat.GraphKeys.UPDATE_OPS) with tf_compat.control_dependencies(update_ops): training_op = optimizer.minimize(loss, global_step=global_step) return training_op
def rand_crop(img: tf_compat.Tensor): with tf_compat.name_scope(name): orig_shape = tf_compat.shape(img) scale = tf_compat.random_uniform(shape=[1], minval=scale_range[0], maxval=scale_range[1])[0] ratio = tf_compat.random_uniform(shape=[1], minval=ratio_range[0], maxval=ratio_range[1])[0] height = tf_compat.minimum( tf_compat.cast( tf_compat.round( tf_compat.cast(orig_shape[0], dtype=tf_compat.float32) * scale / ratio), tf_compat.int32, ), orig_shape[0], ) width = tf_compat.minimum( tf_compat.cast( tf_compat.round( tf_compat.cast(orig_shape[1], dtype=tf_compat.float32) * scale), tf_compat.int32, ), orig_shape[1], ) img = tf_compat.random_crop(img, [height, width, orig_shape[2]]) return img
def _set_constant_mask(): # Assign mask tensor to be 1 for all nonzero values of op_var_tens otherwise 0 # On end step, revert mask to be all 1s with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): new_mask = tf_compat.cond( is_start_step, lambda: tf_compat.cast(tf_compat.not_equal(op_var_tens, 0.0), dtype=op_var_tens.dtype), lambda: tf_compat.ones(op_var_tens.shape, dtype=op_var_tens.dtype), ) weight_var = get_tensor_var(op_var_tens) return tf_compat.group( tf_compat.assign(mask, new_mask, name=PruningScope.OP_MASK_ASSIGN), tf_compat.assign(weight_var, masked, name=PruningScope.OP_WEIGHT_UPDATE), )
def simple_matmul_net(init_weights): tf_compat.reset_default_graph() n_inputs = 28 * 28 n_hidden1 = 300 n_hidden2 = 100 n_outputs = 10 X = tf_compat.placeholder(tf_compat.float32, shape=(None, n_inputs), name="X") def neuron_layer(X, n_neurons, name, activation=None): with tf_compat.name_scope(name): n_inputs = int(X.get_shape()[1]) stddev = 2 / np.sqrt(n_inputs) init = tf_compat.truncated_normal((n_inputs, n_neurons), stddev=stddev) W = tf_compat.Variable(init, name="kernel") b = tf_compat.Variable(tf_compat.zeros([n_neurons]), name="bias") Z = tf_compat.matmul(X, W) + b if activation is not None: return activation(Z) else: return Z with tf_compat.name_scope("dnn"): hidden1 = neuron_layer( X, n_hidden1, name="hidden1", activation=tf_compat.nn.relu ) hidden2 = neuron_layer( hidden1, n_hidden2, name="hidden2", activation=tf_compat.nn.relu ) neuron_layer(hidden2, n_outputs, name="outputs") return tf_compat.get_default_graph()
def create_ops( self, steps_per_epoch: int, global_step: Optional[tf_compat.Variable], graph: Optional[tf_compat.Graph], ) -> Tuple[List[Union[tf_compat.Tensor, tf_compat.Operation]], Dict[str, Any]]: """ Create ops to set the learning rate at a given value if the global step reaches a given value :param steps_per_epoch: the number of steps (batches) per training epoch :param global_step: the global step used while training :param graph: the graph to be modified :return: a tuple (empty list of ops, dict of learning rate and logging summaries) """ mod_ops, mod_extras = super().create_ops(graph, steps_per_epoch, global_step) name_scope = "{}/{}".format(NM_RECAL, self.__class__.__name__) with graph.as_default(): with tf_compat.name_scope(name_scope): learning_rate = tf_compat.constant( self.learning_rate, tf_compat.float32, name="learning_rate" ) _add_lr_extras(mod_extras, learning_rate, self.log_types) return mod_ops, mod_extras
def test_lifecycle( self, modifier_lambda: Callable[[], SetLearningRateModifier], graph_lambda: Callable[[], tf_compat.Graph], steps_per_epoch: int, optim_lambda, ): modifier = modifier_lambda() graph = graph_lambda() with graph.as_default(): global_step = tf_compat.train.get_or_create_global_step() # Further set up for loss, optimizer and training op x_batch = graph.get_tensor_by_name("inp:0") y_pred = graph.get_tensor_by_name("out:0") n_inputs = x_batch.shape[1] n_outputs = y_pred.shape[1] y_lab = tf_compat.placeholder(tf_compat.float32, shape=(None, n_outputs), name="y") mod_ops, mod_extras = modifier.create_ops(steps_per_epoch, global_step=global_step, graph=graph) assert len(mod_ops) == 0 assert len(mod_extras) == 2 assert EXTRAS_KEY_LEARNING_RATE in mod_extras assert EXTRAS_KEY_SUMMARIES in mod_extras learning_rate = mod_extras[EXTRAS_KEY_LEARNING_RATE] with tf_compat.name_scope("train"): optimizer = optim_lambda(learning_rate=learning_rate) loss = tf_compat.losses.mean_squared_error(y_lab, y_pred) training_op = optimizer.minimize(loss, global_step=global_step) np.random.seed(12) batch_size = 8 batch_x = np.random.randn(batch_size, n_inputs) batch_lab = np.random.randn(batch_size, n_outputs) with tf_compat.Session(graph=graph) as sess: sess.run(tf_compat.global_variables_initializer()) for epoch in range( int(max(modifier.start_epoch, modifier.end_epoch)) + 5): for step in range(steps_per_epoch): gs = sess.run(global_step) expected = modifier.learning_rate optim_lr = sess.run(_get_lr(optimizer)) assert ( abs(optim_lr - expected) <= EPSILON ), "Failed at epoch:{} step:{} global_step:{}".format( epoch, step, gs) sess.run( training_op, feed_dict={ x_batch: batch_x, y_lab: batch_lab }, )
def res(img: tf_compat.Tensor): with tf_compat.name_scope(name): try: img = tf_compat.image.resize(img, image_size) except Exception: img = tf_compat.image.resize_images(img, image_size) return img
def create_ops( self, steps_per_epoch: int, global_step: tf_compat.Variable, graph: tf_compat.Graph, ) -> Tuple[List[Union[tf_compat.Tensor, tf_compat.Operation]], Dict[str, Any]]: """ Create switch case computing the learning rate at a given global step and extras created by individual LR modifiers :param steps_per_epoch: the number of steps per training epoch :param global_step: the global step used while training :param graph: the graph to be modified :return: a tuple (list of empty ops, dict of named ops/tensors for learning rate and summaries as extras) """ mod_ops, mod_extras = super().create_ops(graph, steps_per_epoch, global_step) name_scope = "{}/{}".format(NM_RECAL, self.__class__.__name__) with graph.as_default(): with tf_compat.name_scope(name_scope): pred_fn_pairs = [] global_step = tf_compat.cast(global_step, tf_compat.int64) for index, child in enumerate(self._lr_modifiers): with tf_compat.name_scope(str(index)): _, child_extras = child.create_ops( steps_per_epoch, global_step, graph ) child_lr = child_extras[EXTRAS_KEY_LEARNING_RATE] child_start_step, _ = child.start_end_steps( steps_per_epoch, after_optim=False ) child_select = tf_compat.greater_equal( global_step, tf_compat.constant(child_start_step, tf_compat.int64), name="active", ) pred_fn_pairs.append((child_select, lambda lr=child_lr: lr)) learning_rate = tf_compat.case(pred_fn_pairs) _add_lr_extras(mod_extras, learning_rate, self.log_types) return mod_ops, mod_extras
def multi_step_lr_schedule( global_step: tf_compat.Tensor, start_step: int, milestone_steps: List[int], init_lr: float, gamma: float, name: str = "multi_step_lr_schedule", ): """ Create a multi step learning rate schedule in the current graph. Multiplies init_lr by gamma after each milestone has passed. Ex: lr = init_lr * (gamma ** NUM_UPDATES) :param global_step: the global step used for training :param start_step: the step to start the exponential schedule on :param milestone_steps: a list of steps to decrease the learning rate at, these are the number of steps that must pass after start_step to decrease lr :param init_lr: the learning rate to start the schedule with :param gamma: the decay weight to decrease init_lr by after every step_size interval :param name: the name scope to create the graph under :return: the calculated learning rate tensor """ with tf_compat.name_scope(name): global_step = tf_compat.cast(global_step, tf_compat.int64) milestone_steps = tf_compat.constant( [mile + start_step for mile in milestone_steps], dtype=tf_compat.int64, name="milestone_steps", ) start_step = tf_compat.constant(start_step, dtype=tf_compat.int64, name="start_step") init_lr = tf_compat.constant(init_lr, dtype=tf_compat.float32, name="init_lr") gamma = tf_compat.constant(gamma, dtype=tf_compat.float32, name="gamma") before = tf_compat.less(global_step, start_step, name="before") def _calc_lr(): less = tf_compat.cast( tf_compat.greater_equal(global_step, milestone_steps), tf_compat.int64) updates = tf_compat.reduce_sum(less) mult_g = tf_compat.pow(gamma, tf_compat.cast(updates, tf_compat.float32)) return tf_compat.multiply(init_lr, mult_g) learning_rate = tf_compat.cond(before, lambda: init_lr, _calc_lr, name="learning_rate") return learning_rate
def pruning_loss_sens_op_vars( graph: tf_compat.Graph = None, var_names: Union[List[str], Tuple[str]] = ("re:.*", ), mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured", ) -> List[SparsePruningOpVars]: """ Edit the graph for to inject pruning ops and vars to allow for a ks loss sensitivity analysis. Note: this must be run outside of a session for it to take effect. :param graph: the graph to inject pruning ops and vars into, if not supplied uses get_default_graph() :param var_names: List of variable names or regex patterns of variables to get the op vars for. Defaults to matching all variables :param mask_type: String to define type of sparsity (options: ['unstructured', 'channel', 'filter']), List to define block shape of a parameter's in and out channels, or a SparsityMaskCreator object. default is 'unstructured' :return: the created pruning op vars to be used in approx_ks_loss_sensitivity and one_shot_ks_loss_sensitivity """ if not graph: graph = tf_compat.get_default_graph() mask_creator = mask_type if not isinstance(mask_type, PruningMaskCreator): mask_creator = load_mask_creator(mask_type) ks_group = pruning_loss_sens_one_shot.__name__ prunable_ops_and_inputs = get_ops_and_inputs_by_name_or_regex( var_names, graph) op_vars = [] with graph.as_default(): for prune_op, prune_op_input in prunable_ops_and_inputs: with tf_compat.name_scope( PruningScope.model(prune_op, ks_group, trailing_slash=True)): sparsity = tf_compat.placeholder(dtype=tf_compat.float32, name="sparsity_placeholder") update = tf_compat.constant(True, tf_compat.bool) prune_op_var = create_op_pruning( prune_op, prune_op_input, sparsity, update, True, None, ks_group, mask_creator, ) op_vars.append(SparsePruningOpVars(prune_op_var, sparsity)) return op_vars
def create_op_pruning_no_update( op: tf_compat.Operation, op_input: tf_compat.Tensor, ks_group: str, leave_enabled: bool = True, is_after_end_step: tf_compat.Tensor = None, ) -> PruningOpVars: """ Creates the necessary variables and operators to gradually apply sparsity to an operators variable without returning a PruningOpVars.update value. :param op: the operation to prune to the given sparsity :param op_input: the parameter within the op to create a mask for :param ks_group: the group identifier the scope should be created under mask_creator :param leave_enabled: True to continue masking the weights after end_epoch, False to stop masking :param is_after_end_step: only should be provided if leave_enabled is False; tensor that is true if the current global step is after end_epoch :return: a named tuple containing the assignment op, mask variable, threshold tensor, and masked tensor """ if tf_contrib_err: raise tf_contrib_err op_sgv = graph_editor.sgv(op) # create the necessary variables first with tf_compat.variable_scope(PruningScope.model(op, ks_group), reuse=tf_compat.AUTO_REUSE): mask = tf_compat.get_variable( PruningScope.VAR_MASK, op_input.get_shape(), initializer=tf_compat.ones_initializer(), trainable=False, dtype=op_input.dtype, ) tf_compat.add_to_collection( PruningScope.collection_name(ks_group, PruningScope.VAR_MASK), mask) # create the masked operation and assign as the new input to the op with tf_compat.name_scope( PruningScope.model(op, ks_group, trailing_slash=True)): masked = tf_compat.multiply(mask, op_input, PruningScope.OP_MASKED_VAR) op_inp_tens = (masked if leave_enabled else tf_compat.cond( is_after_end_step, lambda: op_input, lambda: masked)) op_swapped_inputs = [ inp if inp != op_input else op_inp_tens for inp in op_sgv.inputs ] graph_editor.swap_inputs(op, op_swapped_inputs) tf_compat.add_to_collection( PruningScope.collection_name(ks_group, PruningScope.OP_MASKED_VAR), masked) return PruningOpVars(op, op_input, None, mask, masked)
def mlp_net(): inp = tf_compat.placeholder(tf_compat.float32, [None, 16], name="inp") with tf_compat.name_scope("mlp_net"): fc1 = _fc("fc1", inp, 16, 32) fc2 = _fc("fc2", fc1, 32, 64) fc3 = _fc("fc3", fc2, 64, 64, add_relu=False) out = tf_compat.sigmoid(fc3, name="out") return out, inp
def neuron_layer(X, n_neurons, name, activation=None): with tf_compat.name_scope(name): n_inputs = int(X.get_shape()[1]) stddev = 2 / np.sqrt(n_inputs) init = tf_compat.truncated_normal((n_inputs, n_neurons), stddev=stddev) W = tf_compat.Variable(init, name="kernel") b = tf_compat.Variable(tf_compat.zeros([n_neurons]), name="bias") Z = tf_compat.matmul(X, W) + b if activation is not None: return activation(Z) else: return Z
def _no_op(): with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): # return no op wrapped in group to match update type return tf_compat.group( tf_compat.constant(0.0, dtype=op_var_tens.dtype, name=PruningScope.OP_MASK_UPDATE_NO_OP))
def conv_net(): inp = tf_compat.placeholder(tf_compat.float32, [None, 28, 28, 1], name="inp") with tf_compat.name_scope("conv_net"): conv1 = _conv("conv1", inp, 1, 32, 3, 2, "SAME") conv2 = _conv("conv2", conv1, 32, 32, 3, 2, "SAME") avg_pool = tf_compat.reduce_mean(conv2, axis=[1, 2]) reshape = tf_compat.reshape(avg_pool, [-1, 32]) mlp = _fc("mlp", reshape, 32, 10, add_relu=False) out = tf_compat.sigmoid(mlp, name="out") return out, inp
def processor(self, file_path: tf_compat.Tensor, label: tf_compat.Tensor): """ :param file_path: the path to the file to load an image from :param label: the label for the given image :return: a tuple containing the processed image and label """ with tf_compat.name_scope("img_to_tensor"): img = tf_compat.read_file(file_path) # Decode and reshape the image to 3 dimensional tensor # Note: "expand_animations" not available for TF 1.13 and prior, # hence the reshape trick below img = tf_compat.image.decode_image(img) img_shape = tf_compat.shape(img) img = tf_compat.reshape(img, [img_shape[0], img_shape[1], img_shape[2]]) img = tf_compat.cast(img, dtype=tf_compat.float32) if self.pre_resize_transforms: transforms = (self.pre_resize_transforms.train if self.train else self.pre_resize_transforms.val) if transforms: with tf_compat.name_scope("pre_resize_transforms"): for trans in transforms: img = trans(img) if self._image_size: res_callable = resize((self.image_size, self.image_size)) img = res_callable(img) if self.post_resize_transforms: transforms = (self.post_resize_transforms.train if self.train else self.post_resize_transforms.val) if transforms: with tf_compat.name_scope("post_resize_transforms"): for trans in transforms: img = trans(img) return img, label
def create_ops( self, steps_per_epoch: int, global_step: Optional[tf_compat.Variable], graph: Optional[tf_compat.Graph], ) -> Tuple[List[Union[tf_compat.Tensor, tf_compat.Operation]], Dict[str, Any]]: """ Create ops to update the learning rate at the current global step :param steps_per_epoch: the number of steps (batches) per training epoch :param global_step: the global step used while training :param graph: the graph to be modified :return: a tuple (empty list of ops, dict of learning rate and summaries) """ mod_ops, mod_extras = super().create_ops(graph, steps_per_epoch, global_step) name_scope = "{}/{}".format(NM_RECAL, self.__class__.__name__) with graph.as_default(): with tf_compat.name_scope(name_scope): lr_class, lr_kwargs = self.corrected_lr_info( steps_per_epoch, self.start_epoch, self.end_epoch ) start_step, end_step = self.start_end_steps( steps_per_epoch, after_optim=False ) if lr_class == "StepLR": learning_rate = step_lr_schedule( global_step, start_step, end_step, lr_kwargs["step_size"], self.init_lr, lr_kwargs["gamma"], ) elif lr_class == "MultiStepLR": learning_rate = multi_step_lr_schedule( global_step, start_step, lr_kwargs["milestones"], self.init_lr, lr_kwargs["gamma"], ) else: raise ValueError( "unrecognized lr_class given of {}".format(lr_class) ) _add_lr_extras(mod_extras, learning_rate, self.log_types) return mod_ops, mod_extras
def _fc(name, x_tens, in_chan, out_chan, add_relu=True): with tf_compat.name_scope(name): weight = tf_compat.Variable( tf_compat.random_normal([in_chan, out_chan]), name="weight" ) bias = tf_compat.Variable(tf_compat.random_normal([out_chan]), "bias") x_tens = tf_compat.matmul(x_tens, weight, name="matmul") x_tens = tf_compat.add(x_tens, bias, name="add") if add_relu: x_tens = tf_compat.nn.relu(x_tens, name="relu") return x_tens
def preprocess_for_eval(image: tf_compat.Tensor): """ The default preprocessing function for test set as defined in Resnet paper for Cifar datasets :param image: the image tensor :return: the preprocessed image """ with tf_compat.name_scope("test_preprocess"): image = tf_compat.cast(image, dtype=tf_compat.float32) image = tf_compat_div(image, 255.0) image = tf_compat.image.random_crop(image, [32, 32, 3]) return image
def cent_crop(img: tf_compat.Tensor): with tf_compat.name_scope(name): orig_shape = tf_compat.shape(img) min_size = tf_compat.cond( tf_compat.greater_equal(orig_shape[0], orig_shape[1]), lambda: orig_shape[1], lambda: orig_shape[0], ) if padding > 0: orig_shape_list = img.get_shape().as_list() resize((orig_shape_list[0] + 2 * padding, orig_shape_list[1] + 2 * padding)) padding_height = tf_compat.add( tf_compat.cast( tf_compat.round( tf_compat.div( tf_compat.cast( tf_compat.subtract(orig_shape[0], min_size), tf_compat.float32, ), 2.0, )), tf_compat.int32, ), padding, ) padding_width = tf_compat.add( tf_compat.cast( tf_compat.round( tf_compat.div( tf_compat.cast( tf_compat.subtract(orig_shape[1], min_size), tf_compat.float32, ), 2.0, )), tf_compat.int32, ), padding, ) img = tf_compat.image.crop_to_bounding_box(img, padding_height, padding_width, min_size, min_size) return img
def create_metrics( self, net_outputs: Union[tf_compat.Tensor, Dict[str, tf_compat.Tensor]], labels: Union[tf_compat.Tensor, Dict[str, tf_compat.Tensor]], params: Dict[str, Any], ) -> ( Dict[str, Tuple[tf_compat.Tensor, tf_compat.Operation]], Dict[str, tf_compat.Operation], ): """ Create metrics for evaluation :param net_outputs: output tensors of the model graph :param labels: ground truth labels :param params: the model function params :return: dictionary of metrics and their reset operations """ metrics = params.get("metrics", []) metrics_dict = {} metrics_initializers_dict = {} with tf_compat.name_scope("metrics"): for metric in metrics: if metric == "accuracy": labels_argmax = tf_compat.argmax(labels, 1) net_outputs_argmax = tf_compat.argmax(net_outputs, 1) metrics_dict["accuracy"] = tf_compat.metrics.accuracy( labels_argmax, net_outputs_argmax, name="accuracy_metric", ) # The total and count variables created to support accuracy running_vars = tf_compat.get_collection( tf_compat.GraphKeys.LOCAL_VARIABLES, scope="metrics/accuracy_metric", ) running_vars_initializer = tf_compat.variables_initializer( var_list=running_vars) metrics_initializers_dict[ metric] = running_vars_initializer else: raise ValueError("Unsupported metric: {}".format(metric)) return (metrics_dict, metrics_initializers_dict)
def apply_op_vars_masks(pruning_op_vars: List[PruningOpVars], ks_group: str, sess: tf_compat.Session): """ Apply the masks to the original ops input var so that it can be saved with the desired sparsity for later. :param pruning_op_vars: the list of named tuples containing the sparse mask and the op variable to apply the sparse mask to :param ks_group: the group to create the assign ops under :param sess: the session to use to run the assign """ for op_vars in pruning_op_vars: with tf_compat.name_scope( PruningScope.model(op_vars.op, ks_group, PruningScope.OP_SAVE)): masked_var = tf_compat.multiply(op_vars.op_input, op_vars.mask) input_var = get_tensor_var(op_vars.op_input) assign = tf_compat.assign(input_var, masked_var) sess.run(assign)
def _conv(name, x_tens, in_chan, out_chan, kernel, stride, padding, add_relu=True): """ https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/nn/conv2d """ with tf_compat.name_scope(name): weight = tf_compat.Variable( tf_compat.random_normal([kernel, kernel, in_chan, out_chan]), name="weight" ) bias = tf_compat.Variable(tf_compat.random_normal([out_chan]), name="bias") x_tens = tf_compat.nn.conv2d( x_tens, weight, strides=[1, stride, stride, 1], padding=padding, name="conv" ) x_tens = tf_compat.nn.bias_add(x_tens, bias, name="add") if add_relu: x_tens = tf_compat.nn.relu(x_tens, name="relu") return x_tens
def build( self, batch_size: int, repeat_count: int = None, shuffle_buffer_size: int = None, prefetch_buffer_size: int = None, num_parallel_calls: int = None, ) -> tf_compat.data.Dataset: """ Create the dataset in the current graph using tf.data APIs :param batch_size: the batch size to create the dataset for :param repeat_count: the number of times to repeat the dataset, if unset or None, will repeat indefinitely :param shuffle_buffer_size: None if not shuffling, otherwise the size of the buffer to use for shuffling data :param prefetch_buffer_size: None if not prefetching, otherwise the size of the buffer to use for buffering :param num_parallel_calls: the number of parallel calls to run the processor function with :return: a tf.data.Dataset instance """ with tf_compat.name_scope(self.name_scope()): dataset = self.creator() if shuffle_buffer_size and shuffle_buffer_size > 0: dataset = dataset.shuffle( shuffle_buffer_size, reshuffle_each_iteration=True ) dataset = dataset.map(self.processor, num_parallel_calls=num_parallel_calls) # Together with shuffling above, putting batch after repeat yields # batches that straddle epoch boundaries dataset = dataset.repeat(repeat_count) dataset = dataset.batch(batch_size) if prefetch_buffer_size and prefetch_buffer_size > 0: dataset = dataset.prefetch(prefetch_buffer_size) return dataset
def _update(): # create the update ops using the target sparsity tensor with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): new_mask = mask_creator.create_sparsity_mask(op_var_tens, sparsity) weight_var = get_tensor_var(op_var_tens) return tf_compat.group( tf_compat.assign(mask, new_mask, name=PruningScope.OP_MASK_ASSIGN), tf_compat.assign( weight_var, tf_compat.multiply(new_mask, op_var_tens), name=PruningScope.OP_WEIGHT_UPDATE, ), )
def create_loss( self, net_outputs: Union[tf_compat.Tensor, Dict[str, tf_compat.Tensor]], labels: Union[tf_compat.Tensor, Dict[str, tf_compat.Tensor]], params: Dict[str, Any], ) -> tf_compat.Tensor: """ Create loss function :param net_outputs: output tensors of the model graph :param labels: ground truth labels :param params: the model function params :return: a loss tensor """ loss = params.get("loss") with tf_compat.name_scope("loss"): if loss == "cross_entropy": xentropy = tf_compat.nn.softmax_cross_entropy_with_logits_v2( labels=labels, logits=net_outputs) loss_tens = tf_compat.reduce_mean(xentropy, name="loss") else: raise ValueError("Unsupported loss function: {}".format(loss)) return loss_tens
def preprocess_for_train(image: tf_compat.Tensor): """ The default preprocessing function for train set as defined in Resnet paper for Cifar datasets :param image: the image tensor :return: the preprocessed image """ with tf_compat.name_scope("train_preprocess"): image = tf_compat.cast(image, dtype=tf_compat.float32) rand_choice = tf_compat.random_uniform(shape=[], minval=0, maxval=2, dtype=tf_compat.int32) padding = _PADDING image = tf_compat.cond( tf_compat.equal(rand_choice, 0), lambda: tf_compat.pad(image, [[padding, padding], [padding, padding], [0, 0]]), lambda: tf_compat.image.random_flip_left_right(image), ) distorted_image = tf_compat.image.random_crop(image, [32, 32, 3]) return distorted_image
def create_op_pruning( op: tf_compat.Operation, op_input: tf_compat.Tensor, sparsity: tf_compat.Tensor, update_ready: tf_compat.Tensor, leave_enabled: bool, is_after_end_step: tf_compat.Tensor, ks_group: str, mask_creator: PruningMaskCreator, ) -> PruningOpVars: """ Creates the necessary variables and operators to gradually apply sparsity to an operators variable. Handles setting a mask on an operator to the given sparsity. Sets the mask based on pruning away the lowest absolute magnitude weights. :param op: the operation to prune to the given sparsity :param op_input: the variable of the parameter within op to prune :param sparsity: the target sparsity to use for assigning the masks :param update_ready: the tensor where if true will update the mask from sparsity, if false will not update the mask :param leave_enabled: True to continue masking the weights after end_epoch, False to stop masking :param is_after_end_step: tensor that is true if the current global step is after end_epoch :param ks_group: the group identifier the scope should be created under :param mask_creator: object to define sparisty mask creation :return: a named tuple containing the assignment op, mask variable, threshold tensor, and masked tensor """ initial_vars = create_op_pruning_no_update(op, op_input, ks_group, leave_enabled, is_after_end_step) op = initial_vars.op op_var_tens = initial_vars.op_input mask = initial_vars.mask masked = initial_vars.masked def _update(): # create the update ops using the target sparsity tensor with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): new_mask = mask_creator.create_sparsity_mask(op_var_tens, sparsity) weight_var = get_tensor_var(op_var_tens) return tf_compat.group( tf_compat.assign(mask, new_mask, name=PruningScope.OP_MASK_ASSIGN), tf_compat.assign( weight_var, tf_compat.multiply(new_mask, op_var_tens), name=PruningScope.OP_WEIGHT_UPDATE, ), ) def _no_update(): with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): # return no op wrapped in group to match update type return tf_compat.group( tf_compat.constant(0.0, dtype=op_var_tens.dtype, name=PruningScope.OP_MASK_UPDATE_NO_OP)) with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): mask_update = tf_compat.cond(update_ready, _update, _no_update, name=PruningScope.OP_MASK_UPDATE) # add return state to collections tf_compat.add_to_collection( PruningScope.collection_name(ks_group, PruningScope.OP_MASK_UPDATE), mask_update) return PruningOpVars(op, op_var_tens, mask_update, mask, masked)
def create_ks_schedule_ops( global_step: tf_compat.Variable, begin_step: int, end_step: int, update_step_freq: int, init_sparsity: float, final_sparsity: float, exponent: float, ks_group: str, ) -> Tuple[tf_compat.Tensor, tf_compat.Tensor]: """ Create a gradual schedule for model pruning (kernel sparsity). Creates a sparsity tensor that goes from init_sparsity til final_sparsity starting at begin_step and ending at end_step. Uses the global_step to map those. Additionally creates an update_ready tensor that is True if an update to the sparsity tensor should be run, False otherwise. :param global_step: the global optimizer step for the training graph :param begin_step: the global step to begin pruning at :param end_step: the global step to end pruning at :param update_step_freq: the number of global steps between each weight update :param init_sparsity: the starting value for sparsity of a weight tensor to be enforce :param final_sparsity: the end value for sparsity for a weight tensor to be enforce :param exponent: the exponent to use for interpolating between init_sparsity and final_sparsity higher values will lead to larger sparsity steps at the beginning vs the end ie: linear (1) vs cubic (3) :param ks_group: the group identifier the scope should be created under :return: a tuple containing the signal for update_ready and the target sparsity """ # create the scheduling ops first and the sparsity ops with tf_compat.name_scope( PruningScope.general(ks_group, additional=PruningScope.OPS_SCHEDULE, trailing_slash=True)): sched_before = tf_compat.less(global_step, begin_step) sched_start = tf_compat.equal(global_step, begin_step) sched_end = tf_compat.equal(global_step, end_step) sched_active = tf_compat.logical_and( tf_compat.greater(global_step, begin_step), tf_compat.less(global_step, end_step), ) sched_active_inclusive = tf_compat.logical_or( sched_active, tf_compat.logical_or(sched_start, sched_end)) sched_update = tf_compat.cond( tf_compat.less_equal(update_step_freq, 0), lambda: tf_compat.constant(True), lambda: tf_compat.equal( tf_compat.mod( (global_step - begin_step), update_step_freq), 0), ) sched_update_ready = tf_compat.logical_or( tf_compat.logical_or(sched_start, sched_end), sched_update) percentage = tf_compat.minimum( 1.0, tf_compat.maximum( 0.0, tf_compat_div( tf_compat.cast(global_step - begin_step, tf_compat.float32), end_step - begin_step, ), ), ) exp_percentage = 1 - tf_compat.pow(1 - percentage, exponent) calc_sparsity = (tf_compat.multiply(final_sparsity - init_sparsity, exp_percentage) + init_sparsity) # create the update ready tensor and sparsity tensor with tf_compat.name_scope( PruningScope.general(ks_group, trailing_slash=True)): update_ready = tf_compat.logical_and( sched_active_inclusive, sched_update_ready, name=PruningScope.OP_UPDATE_READY, ) sparsity = tf_compat.case( [ (sched_before, lambda: tf_compat.constant(0.0)), (sched_start, lambda: tf_compat.constant(init_sparsity)), (sched_active, lambda: calc_sparsity), ], default=lambda: tf_compat.constant(final_sparsity), name=PruningScope.OP_SPARSITY, ) # add return state to collections tf_compat.add_to_collection( PruningScope.collection_name(ks_group, PruningScope.OP_UPDATE_READY), update_ready, ) tf_compat.add_to_collection( PruningScope.collection_name(ks_group, PruningScope.OP_SPARSITY), sparsity) return update_ready, sparsity
def create_constant_op_pruning( op: tf_compat.Operation, op_input: tf_compat.Tensor, is_start_step: tf_compat.Tensor, is_end_step: tf_compat.Tensor, ks_group: str, ) -> PruningOpVars: """ Creates PruningOpVars with constant mask for the given operation on start step, sets mask to be all 1s for the weight tensor where the operation input is non zero and 0 elsewhere. At the end_step we revert the mask to be all 1s and update the weight. :param op: the operation to prune to the given sparsity :param op_input: the input tensor to op to create a constant mask for :param is_start_step: True only if we are at the start step. :param is_end_step: True only if we are at the start end step. :param ks_group: the group identifier the scope should be created under :return: a named tuple containing the assignment op, mask variable, threshold tensor, and masked tensor """ initial_vars = create_op_pruning_no_update(op, op_input, ks_group) op = initial_vars.op op_var_tens = initial_vars.op_input mask = initial_vars.mask masked = initial_vars.masked is_start_or_end_step = tf_compat.logical_or(is_start_step, is_end_step) def _set_constant_mask(): # Assign mask tensor to be 1 for all nonzero values of op_var_tens otherwise 0 # On end step, revert mask to be all 1s with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): new_mask = tf_compat.cond( is_start_step, lambda: tf_compat.cast(tf_compat.not_equal(op_var_tens, 0.0), dtype=op_var_tens.dtype), lambda: tf_compat.ones(op_var_tens.shape, dtype=op_var_tens.dtype), ) weight_var = get_tensor_var(op_var_tens) return tf_compat.group( tf_compat.assign(mask, new_mask, name=PruningScope.OP_MASK_ASSIGN), tf_compat.assign(weight_var, masked, name=PruningScope.OP_WEIGHT_UPDATE), ) def _no_op(): with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): # return no op wrapped in group to match update type return tf_compat.group( tf_compat.constant(0.0, dtype=op_var_tens.dtype, name=PruningScope.OP_MASK_UPDATE_NO_OP)) with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): mask_update = tf_compat.cond( is_start_or_end_step, _set_constant_mask, _no_op, name=PruningScope.OP_MASK_UPDATE, ) return PruningOpVars(op, op_var_tens, mask_update, mask, masked)