def test_testnet_checkpointall(): model = get_keras_model("test") g = dfgraph_from_keras(mod=model) assert g.size_fwd == 6 scheduler_result = solve_checkpoint_all(g) assert scheduler_result.feasible assert scheduler_result.schedule_aux_data.cpu == sum(g.cost_cpu.values())
def test_checkmate_to_simrd_analytical_cost(): if not have_checkmate: return test_log_filename = 'data/checkmate_simrd.log' batch_size = 1 with open(test_log_filename, 'w') as test_log: for name in MODEL_NAMES: try: model = get_keras_model(name) dfg = dfgraph_from_keras(model, batch_size=batch_size, loss_cpu_cost=0, loss_ram_cost=(4 * batch_size)) g = from_dfgraph(dfg) rt, result, pr = run_baseline(g.get_closure(), stats=True, trace=True) print('Baseline simrd results for {}:'.format(name), file=test_log) print(json.dumps(result, indent=2), file=test_log) print(file=test_log) except Exception as e: print('Failed for {}:'.format(name), file=test_log) print(traceback.format_exc(), file=test_log) print(file=test_log) print( 'saved Checkmate -> simrd test log to [{}]'.format(test_log_filename))
def execute_one(log_base: str, solve_strategy: SolveStrategy, model_name: str, batch_size: int, platform: str, input_shape=None, model_version="v1", num_runs=16, buffer_mem: int = 0) -> Tuple[Optional[RSResult], str, int]: logger = setup_logger("eval_one") results_and_keys = get_solutions_to_evaluate(solve_strategy, model_name, batch_size, platform, input_shape, model_version, buffer_mem) if not results_and_keys: logger.info("No results found") return None, "", 0 if not EAGER: tf1.disable_eager_execution() for result, result_key in results_and_keys: tf.keras.backend.clear_session() model = get_keras_model(model_name, input_shape=input_shape) tf2 = TF2ExtractorParams(model, batch_size=batch_size, log_base=log_base) loss_fn = categorical_cross_entropy # TODO: vgg_unet may need a different loss graph = tf2.g # TODO TEST THIS VS TENSORSPEC runner = TF2Runner(model, graph, result.schedule, loss_fn=loss_fn, eager=EAGER, log_base=log_base, batch_size=batch_size) try: throughput = evaluate_solved_model(result=result, runner=runner, warmup=10 if EAGER else 64, trials=num_runs, batch_size=batch_size) logger.info( f"Successfully executed model with predicted memory usage {result.peak_ram}, " f"predicted cpu {result.cpu}, actual throughput {throughput}") return result, result_key, throughput except Exception as e: logger.error("Error running model with predicted mem usage %s: %s", result.peak_ram, e) logger.error("Traceback: %s", e.__traceback__) logger.error("Skipping result, going to next candidate.") return None, "", 0
def test_testnet_optimalilp(): try: import gurobipy as _ except ImportError as e: logging.exception(e) logging.warning("Continuing with tests, gurobi not installed") return from remat.core.solvers.strategy_optimal_ilp import solve_ilp_gurobi model = get_keras_model("test") g = dfgraph_from_keras(mod=model) assert g.size_fwd == 6 budget = sum(g.cost_ram.values()) + g.cost_ram_parameters scheduler_result = solve_ilp_gurobi(g, budget) assert scheduler_result.feasible assert scheduler_result.schedule_aux_data.cpu <= sum(g.cost_cpu.values()) assert scheduler_result.schedule_aux_data.activation_ram <= sum( g.cost_cpu.values()) assert scheduler_result.schedule_aux_data.peak_ram <= budget
from remat.core.solvers.strategy_checkpoint_all import solve_checkpoint_all, solve_checkpoint_all_ap from remat.core.solvers.strategy_checkpoint_last import solve_checkpoint_last_node from remat.core.solvers.strategy_chen import solve_chen_greedy, solve_chen_sqrtn from remat.core.solvers.strategy_griewank import solve_griewank from experiments.common.load_keras_model import get_keras_model from remat.core.solvers.strategy_checkpoint_all import solve_checkpoint_all from remat.tensorflow2.extraction import dfgraph_from_keras if __name__ == "__main__": model = get_keras_model("test") g = dfgraph_from_keras(mod=model) scheduler_result = solve_checkpoint_all(g) print(scheduler_result.schedule)
def main(): tf1.logging.set_verbosity('ERROR') parser = argparse.ArgumentParser() parser.add_argument('-n', '--model-name', default='MobileNet', choices=MODEL_NAMES) parser.add_argument('-b', '--batch-size', type=int, default=1) parser.add_argument('-s', '--input-shape', type=int, nargs='+', default=[]) parser.add_argument('-o', '--output-file', default=None) parser.add_argument('-f', '--folder', default='profiles') parser.add_argument('-l', '--loss-function', default='softmax_cross_entropy') parser.add_argument('-c', '--num-runs', type=int, default=1, help='Number of runs of the operator. ' 'Increase to reduce variance') args = parser.parse_args() input_shape = args.input_shape if args.input_shape else None output_file = args.output_file model_name = args.model_name batch_size = args.batch_size if output_file is None: output_file = model_name + "_runtimes" output_file = osp.join(args.folder, output_file) model = get_keras_model(model_name, input_shape=input_shape) loss_fn = eval("tf1.losses.{}".format(args.loss_function)) print("Num layers:", len(model.layers)) # Run first layer a few times (4). On GPUs, it seems the first graph run has some additional overhead. print("Dummy runs of the first layer...") get_exec_time_timeline(model.layers[1], batch_size, num_runs=3) # Profile forward pass print("Profile network start...") forwards = [ get_exec_time_timeline(lyr, batch_size, num_runs=args.num_runs) for lyr in model.layers[1:] ] forwards_times, forwards_stds = map(list, zip(*forwards)) # Profile backward pass backwards = [ get_exec_time_timeline(lyr, batch_size, get_grads=True, num_runs=args.num_runs) for lyr in reversed(model.layers[1:]) ] backwards_times, backwards_stds = map(list, zip(*backwards)) # Profile loss logits_shape = (batch_size, *model.output.shape[1:]) print("logits_shape", logits_shape, "model output shape", model.output.shape, "batch size", batch_size) loss_time, loss_std = get_exec_time_loss(loss_fn, logits_shape, num_runs=args.num_runs) runtimes = forwards_times + [loss_time] + backwards_times stds = forwards_stds + [loss_std] + backwards_stds print() for t, std, lyr in zip(forwards_times, forwards_stds, model.layers[1:]): print("fwd", t, "+-", std / t * 100, "%", lyr.__class__.__name__) for t, std, lyr in zip(backwards_times, backwards_stds, reversed(model.layers[1:])): print("bwd", t, "+-", std / t * 100, "%", lyr.__class__.__name__) print("loss", loss_time, "+-", loss_std / loss_time * 100, "%") print() np.save(output_file, (runtimes, stds))
from experiments.common.load_keras_model import MODEL_NAMES, get_keras_model # MODEL_NAMES = ['VGG16', 'VGG19', 'MobileNet', 'fcn_8', 'pspnet', 'vgg_unet', 'unet', 'segnet', 'resnet50_segnet'] if __name__ == "__main__": for name in MODEL_NAMES: print(name, end=" ") try: model = get_keras_model(name, input_shape=None) print(model.layers[0].input_shape) except Exception as e: print("ERROR for model", name, e)
if args.platform == "flops": cost_model = None else: cost_model = CostModel(model_name, args.platform, log_base, quantization=5) cost_model.fit() # gen redis key if cost_model is None: key_list = ["flops", args.batch_size] else: key_list = [cost_model.platform, cost_model.quantization, args.batch_size] redis_cost_key = "_".join(map(str, key_list)) # load model from Keras logger.info(f"Loading model {model_name}") model = get_keras_model(model_name, input_shape=args.input_shape) g = dfgraph_from_keras(model, batch_size=args.batch_size, cost_model=cost_model, loss_cpu_cost=0, loss_ram_cost=(4 * args.batch_size)) result_dict = pickle.load((log_base / 'result_dict.pickle').open('rb')) simrd_eval_points = pickle.load((log_base / 'simrd_eval_points.pickle').open('rb')) simrd_results = [] for heuristic in SIMRD_HEURISTICS: simrd_results.append(run_simrd(g, heuristic, simrd_eval_points, SIMRD_LIVENESS)) # save simrd results and heuristics used pickle.dump(simrd_results, (log_base / 'simrd_results.pickle').open('wb'), \ protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(SIMRD_HEURISTICS, (log_base / 'simrd_heuristics.pickle').open('wb'), \ protocol=pickle.HIGHEST_PROTOCOL)