def gradient(model, feats_dir, steps, **kwargs): layers = [layer for layer in utils.get_layers(model) if "conv" in layer] empirical = {layer: {} for layer in layers} for i in tqdm(range(len(steps))): step = steps[i] if step == 0: continue weight_buffers = utils.load_features( steps=[str(step)], feats_dir=feats_dir, model=model, suffix="weight.grad_norm_buffer", group="buffers", ) bias_buffers = utils.load_features( steps=[str(step)], feats_dir=feats_dir, model=model, suffix="bias.grad_norm_buffer", group="buffers", ) for layer in layers: wl_t = weight_buffers[layer][f"step_{step}"] bl_t = bias_buffers[layer][f"step_{step}"] empirical[layer][step] = utils.in_synapses(wl_t, bl_t) return {"empirical": empirical}
def scale_momentum(model, feats_dir, steps, **kwargs): lr = kwargs.get("lr") wd = kwargs.get("wd") momentum = kwargs.get("momentum") dampening = kwargs.get("dampening") lr = np.array(lr, dtype=np.float128) wd = np.array(wd, dtype=np.float128) momentum = np.array(momentum, dtype=np.float128) dampening = np.array(dampening, dtype=np.float128) denom = lr * (1 - dampening) * (1 + momentum) gamma = (1 - momentum) / denom omega = np.sqrt(4 * wd / denom) layers = [layer for layer in utils.get_layers(model) if "conv" in layer] W_0 = utils.load_features( steps=[str(steps[0])], feats_dir=feats_dir, model=model, suffix="weight", group="params", ) b_0 = utils.load_features( steps=[str(steps[0])], feats_dir=feats_dir, model=model, suffix="bias", group="params", ) load_kwargs = { "model": model, "feats_dir": feats_dir, } theory_kwargs = { "lr": lr, "wd": wd, "momentum": momentum, "dampening": dampening, "gamma": gamma, "omega": omega, "W_0": W_0, "b_0": b_0, "step_0": steps[0], } theoretical = {layer: {} for layer in layers} empirical = {layer: {} for layer in layers} for i in tqdm(range(len(steps))): step = steps[i] theory_kwargs["i"] = i load_kwargs["group"] = "buffers" compute_theoretical_momentum( step, layers, load_kwargs, theoretical, **theory_kwargs, ) load_kwargs["group"] = "params" compute_empirical(step, layers, load_kwargs, empirical) return {"empirical": empirical, "theoretical": theoretical}
def weights_grads_full(model, feats_dir, steps, **kwargs): lr = kwargs.get("lr") wd = kwargs.get("wd") layers = [layer for layer in utils.get_layers(model)]# if "conv" in layer] load_kwargs = { "model": model, "feats_dir": feats_dir, } weights_and_grads = {layer: {"weight":[],"grad":[]} for layer in layers} steps = np.unique(steps) steps.sort() for i in tqdm(range(1, len(steps))): step = steps[i] extract_weights_and_grads(step, layers, load_kwargs, weights_and_grads, **kwargs) print("Allocating numpy arrays") all_weights = [] all_grads = [] for layer in layers: all_weights.append(np.array(weights_and_grads[layer]["weight"])) all_grads.append(np.array(weights_and_grads[layer]["grad"])) all_weights_and_grads = { "weights": np.array(all_weights), "grads": np.array(all_grads), "steps": steps[1:], } return all_weights_and_grads
def translation(model, feats_dir, steps, **kwargs): lr = kwargs.get("lr") wd = kwargs.get("wd") layers = [ layer for layer in utils.get_layers(model) if "classifier" in layer ] W_0 = utils.load_features( steps=[str(steps[0])], feats_dir=feats_dir, model=model, suffix="weight", group="params", ) b_0 = utils.load_features( steps=[str(steps[0])], feats_dir=feats_dir, model=model, suffix="bias", group="params", ) load_kwargs = { "model": model, "feats_dir": feats_dir, } theory_kwargs = { "lr": lr, "wd": wd, "W_0": W_0, "b_0": b_0, "step_0": steps[0], } theoretical = {layer: {} for layer in layers} empirical = {layer: {} for layer in layers} for i in tqdm(range(len(steps))): step = steps[i] theory_kwargs["i"] = i load_kwargs["group"] = "buffers" compute_theoretical(step, layers, load_kwargs, theoretical, **theory_kwargs) load_kwargs["group"] = "params" compute_empirical(step, layers, load_kwargs, empirical) return {"empirical": empirical, "theoretical": theoretical}
def phase(model, feats_dir, steps, **kwargs): lr = kwargs.get("lr") wd = kwargs.get("wd") layers = [layer for layer in utils.get_layers(model) if "conv" in layer] load_kwargs = { "model": model, "feats_dir": feats_dir, } position = {layer: {} for layer in layers} velocity = {layer: {} for layer in layers} for i in tqdm(range(1, len(steps))): step = steps[i] compute_pos_vel(step, layers, load_kwargs, position, velocity, **kwargs) return {"position": position, "velocity": velocity}
def network(model, feats_dir, steps, **kwargs): subset = kwargs.get("subset", None) seed = kwargs.get("seed", 0) layers = [layer for layer in utils.get_layers(model)] empirical = {layer: {} for layer in layers} for i in range(len(steps)): step = steps[i] weights = utils.load_features( steps=[str(step)], feats_dir=feats_dir, model=model, suffix="weight", group="params", ) biases = utils.load_features( steps=[str(step)], feats_dir=feats_dir, model=model, suffix="bias", group="params", ) np.random.seed(seed) for layer in layers: Wl_t = weights[layer][f"step_{step}"] bl_t = biases[layer][f"step_{step}"] all_weights = np.concatenate((Wl_t.reshape(-1), bl_t.reshape(-1))) if subset is None: random_subset_idx = np.arange(len(all_weights)) else: random_subset_idx = np.random.choice(len(all_weights), size=min( subset, len(all_weights)), replace=False) empirical[layer][step] = all_weights[random_subset_idx] return {"empirical": empirical}