def test_mlpblock_extract(): vgg = get_keras_model("VGG16") @tf.function def get_grads(inputs): y = vgg(inputs) y = tf.reduce_mean(y) return tf.gradients(y, vgg.trainable_variables) x = tf.ones(shape=(1, 224, 224, 3), name="input") grad_conc = get_grads.get_concrete_function(x) g = dfgraph_from_tf_function(grad_conc)
def get_testnet_graph(): import tensorflow as tf from checkmate.tf2.extraction import dfgraph_from_tf_function from checkmate.tf2.util.load_keras_model import get_keras_model model = get_keras_model("test") @tf.function def get_grads(inputs): y = model(inputs) y = tf.reduce_mean(y) return tf.gradients(y, model.trainable_variables) x = tf.ones(shape=(1, 224, 224, 3), name="input") grad_conc = get_grads.get_concrete_function(x) return dfgraph_from_tf_function(grad_conc)
def _build_model_via_solver(dataset: str, model_name: str, train_signature, solver): logging.info("Configuring model " + str(model_name)) model_check = make_model(dataset, model_name) loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() train_loss = tf.keras.metrics.Mean(name="train_loss") train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name="train_accuracy") test_loss = tf.keras.metrics.Mean(name="test_loss") test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name="test_accuracy") logging.info("Building checkpointed model via checkmate") @tf.function(input_signature=train_signature) def grads_check(images, labels): with tf.GradientTape() as check_tape: predictions = model_check(images) loss = loss_object(labels, predictions) gradients = check_tape.gradient(loss, model_check.trainable_variables) train_loss(loss) train_accuracy(labels, predictions) return gradients fn = grads_check.get_concrete_function() g = dfgraph_from_tf_function(fn) sqrtn_fn = edit_graph(fn, g.op_dict, solver(g).schedule) @tf.function def train_step_check(images, labels): gradients = sqrtn_fn(images, labels) optimizer.apply_gradients( zip(gradients, model_check.trainable_variables)) @tf.function def test_step_check(images, labels): predictions = model_check(images) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) return sqrtn_fn, train_step_check, test_step_check, train_loss, train_accuracy, test_loss, test_accuracy
def compile_tf2( model: tf.keras.Model, loss, optimizer, input_spec=None, label_spec=None, scheduler=solver, budget="auto", **kwargs ): set_opts() """ Checkmate optimizes your DNN graphs to consume less GPU memory. Call this function using a tf.function :param model: a keras Model to optimize :param loss: loss function to use when training :param input_spec: tf.TensorSpec list that corresponds to model inputs :param budget: """ # set input, output shapes if model.input_spec is None and input_spec is None: raise ValueError( "Keras model has not been compiled yet! If model.input_spec is not defined, then input_spec " "parameter must be set in the call to checkmate.tensorflow2.compile." ) if label_spec is None: raise ValueError( "Checkmate needs the shape of the label in order to calculate the size of all operations. Pass in" "an example input or tf.TensorSpec object representing the shape of the label." ) input_spec = model.input_spec if input_spec is None else input_spec # query budget if not specified if budget == "auto": budget = _get_gpu_memory_bytes() # build gradient function for model @tf.function def grads_check(data, label): with tf.GradientTape() as check_tape: predictions = model(data) loss_val = loss(label, predictions) gradients = check_tape.gradient(loss_val, model.trainable_variables) return predictions, loss_val, gradients fn = grads_check.get_concrete_function(input_spec, label_spec) g = dfgraph_from_tf_function(fn) # choose solver and calculate solver sched_result = scheduler(g, budget, **kwargs) if not sched_result.feasible: logging.error( "[checkmate] Checkmate solver could find no feasible schedule for the specificed budget of {}".format(budget) ) raise ValueError("No feasible solution for specified budget of {}".format(budget)) logging.debug("[checkmate] Schedule solved") # create recomputed gradient function def clean_bs(tensorspec): newshape = list(tensorspec.shape) newshape[0] = None return tf.TensorSpec(shape=newshape, dtype=tensorspec.dtype) fn_nobatchsize = grads_check.get_concrete_function(clean_bs(input_spec), clean_bs(label_spec)) grad_fn_check = edit_graph(fn_nobatchsize, g.op_dict, sched_result.schedule) @tf.function def train_step_check(data, labels): predictions, loss_val, gradients = grad_fn_check(data, labels) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return predictions, loss_val return train_step_check
def compile_tf2( model: tf.keras.Model, loss: tf.losses.Loss, optimizer: tf.optimizers.Optimizer, input_spec=None, label_spec=None, budget="auto", ): """ Checkmate optimizes your DNN graphs to consume less GPU memory. Call this function using a tf.function :param model: a keras Model to optimize :param loss: loss function to use when training :param input_spec: tf.TensorSpec list that corresponds to model inputs :param budget: """ # set input, output shapes if model.input_spec is None and input_spec is None: raise ValueError( "Keras model has not been compiled yet! If model.input_spec is not defined, then input_spec " "parameter must be set in the call to checkmate.tensorflow2.compile." ) if label_spec is None: raise ValueError( "Checkmate needs the shape of the label in order to calculate the size of all operations. Pass in" "an example input or tf.TensorSpec object representing the shape of the label." ) input_spec = model.input_spec if input_spec is None else input_spec # query budget if not specified if budget == "auto": if _using_gpu_check(): # choose based on available GPU RAM gpu_ram = nvidiasmi_query("memory.total") budget = min(gpu_ram.values()) * 0.9 logging.info( "[checkmate] No budget specified; defaulting to the minimum amount of total GPU RAM on any single " "GPU, {0:.2f}MB".format(budget)) else: # choose based available system memory budget = psutil.virtual_memory().available * 0.8 / 1000000 logging.debug( "[checkmate] No GPU detected, using system DRAM on CPU") logging.info( "[checkmate] No budget specified; defaulting to {0:.2f}MB". format(budget)) # build gradient function for model @tf.function def grads_check(data, label): with tf.GradientTape() as check_tape: predictions = model(data) loss_val = loss(label, predictions) gradients = check_tape.gradient(loss_val, model.trainable_variables) return predictions, loss_val, gradients fn = grads_check.get_concrete_function(input_spec, label_spec) g = dfgraph_from_tf_function(fn) # choose solver and calculate solver logging.error( "[checkmate] At the moment, Checkmate does not guarentee scheduling under the specified budget. " "This feature will appear soon.") logging.debug( "[checkmate] Solving for recomputation schedule, may take a while") logging.debug("[checkmate] Using Chen et al. (2016) sqrt(n) algorithm") sched_result = solve_chen_sqrtn(g, True) logging.debug("[checkmate] Schedule solved") # create recomputed gradient function def clean_bs(tensorspec): newshape = list(tensorspec.shape) newshape[0] = None return tf.TensorSpec(shape=newshape, dtype=tensorspec.dtype) fn_nobatchsize = grads_check.get_concrete_function(clean_bs(input_spec), clean_bs(label_spec)) grad_fn_check = edit_graph(fn_nobatchsize, g.op_dict, sched_result.schedule) @tf.function def train_step_check(data, labels): predictions, loss_val, gradients = grad_fn_check(data, labels) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return predictions, loss_val return train_step_check
if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) logging.info("building graph") with Timer("build_graph", print_results=True): model = get_keras_model("ResNet50") def grads(images, labels): with tf.GradientTape() as tape: pred = model(images) loss = tf.reduce_mean(pred - labels) gradient = tape.gradient(loss, model.trainable_variables) return loss, gradient grad_fn = tf.function(grads).get_concrete_function( tf.TensorSpec(shape=(BS, 224, 224, 3)), tf.TensorSpec(shape=(BS, 1000))) logging.info("tracing graph") with Timer("trace_graph", print_results=True): g = dfgraph_from_tf_function(grad_fn) # sched_result = solve_ilp_gurobi(g, budget=platform_memory("p2xlarge"), approx=False, eps_noise=0.0) # sched_result = solve_approx_lp_deterministic_05_threshold(g, budget=platform_memory("p2xlarge")) logging.info("solving graph") with Timer("sched_graph", print_results=True): sched_result = solve_chen_sqrtn(g, True) # logging.info("rebuilding graph") # new_graph = edit_graph(grad_fn, g.op_dict, sched_result.schedule) # plot_path = checkmate_data_dir() / "exec" # plot_path.mkdir(parents=True, exist_ok=True) # plot_schedule(sched_result, save_file=plot_path / "optimal_vgg16.png")