def test_fit_2(): # Tests goodput.fit's ability to fit to data generated # by its own model class with arbitrary parameters, with # gradient accumulation. Serves as a sanity check # that the goodput.model fitting works in the most # optimistic case. size = (1000, ) nodes = np.random.randint(low=1, high=11, size=size) replicas = np.random.randint(low=1, high=nodes + 1, size=size) local_bsz = np.random.randint(32, 1024, size=size) params = goodput.PerfParams(0.1, 0.01, 0.5, 1.0, 1e-6, 1e-6, 1.2) accum_step_time = goodput._predict_accum_time(params, local_bsz) + \ np.maximum(np.random.normal(0, 0.01, size=size), 0.0) network_time = goodput._predict_network_time(params, nodes, replicas) + \ np.maximum(np.random.normal(0, 0.01, size=size), 0.0) gamma = params.gamma optim_step_time = (accum_step_time**gamma + network_time**gamma)**(1 / gamma) result = goodput.fit_perf_params(nodes, replicas, local_bsz, accum_step_time, optim_step_time) loss_result = goodput._obj_fn(result, nodes, replicas, local_bsz, accum_step_time, optim_step_time) loss_true = goodput._obj_fn(params, nodes, replicas, local_bsz, accum_step_time, optim_step_time) assert(abs(loss_result - loss_true) < 0.1 * loss_true or loss_result < loss_true), \ ("goodput.fit failed to fit model from data generated by", "goodput.PerfParams(0.1, 0.01, 0.5, 1.0, 1e-6, 1e-6, 1.2)", "parameters: {}".format(result))
def _fit_perf_params(): state = _metrics_state() items = state.profile.items() items = [item for item in items if item[1]["count"] > 0] keys = [item[0] for item in items] values = [item[1] for item in items] num_nodes, num_replicas, local_bsz, accumulation_steps = \ (np.array(val) for val in zip(*keys)) step_time = np.array([val["step_time"] / val["count"] for val in values]) sync_time = np.array([val["sync_time"] / val["count"] for val in values]) accumulation_time = np.array( [val["accumulation_step_time"] / val["accumulation_count"] if val["accumulation_count"] > 0 else 0.0 for val in values]) compute_time = step_time - sync_time accumulation_time = np.where( accumulation_steps > 0, accumulation_time, compute_time) state.perf_params = fit_perf_params( num_nodes, num_replicas, local_bsz, accumulation_steps, step_time, compute_time, accumulation_time)
def _fit_perf_params(): state = _metrics_state() profile = {k: v for k, v in state.profile.items() if v.get("optim_count")} # Convert profile into numpy arrays. num_nodes, num_replicas, atomic_bsz = ( np.array(k) for k in zip(*profile.keys())) accum_step_time = np.array([v.get("accum_step_time", 0.0) for v in profile.values()]) accum_count = np.array([v.get("accum_count", 0) for v in profile.values()]) optim_step_time = np.array([v.get("optim_step_time", 0.0) for v in profile.values()]) optim_sync_time = np.array([v.get("optim_sync_time", 0.0) for v in profile.values()]) optim_count = np.array([v.get("optim_count", 0) for v in profile.values()]) assert np.all(optim_count > 0) # Non-sync time during optimization steps should be approximately equal to # accumulation step time, combine those data points. assert np.all(optim_step_time >= optim_sync_time) accum_step_time += optim_step_time - optim_sync_time accum_count += optim_count accum_step_time /= accum_count optim_step_time /= optim_count state.perf_params = fit_perf_params(num_nodes, num_replicas, atomic_bsz, accum_step_time, optim_step_time)