def run(script_corr): """Three different parsers: w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single script script_parser[i_script] : weights vector for each script transform_parser[i_layer] : transform matrix (scripts x scripts) for each alphabet""" RS = RandomState((seed, "top_rs")) train_data, valid_data, tests_data = omniglot.load_data_split( [11, 2, 2], RS, num_alphabets=N_scripts) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size uncorrelated_mat = np.eye(N_scripts) fully_correlated_mat = np.full((N_scripts, N_scripts), 1.0 / N_scripts) transform_mat = (1 - script_corr ) * uncorrelated_mat + script_corr * fully_correlated_mat transform_mat = transform_mat transform_parser = VectorParser() for i_layer in range(N_layers): if i_layer == N_layers - 1: transform_parser[i_layer] = uncorrelated_mat else: transform_parser[i_layer] = transform_mat script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) def transform_weights(all_z_vect, transform_vect, i_script_out): all_z = script_parser.new_vect(all_z_vect) transform = transform_parser.new_vect(transform_vect) W = OrderedDict( ) # Can't use parser because setting plain array ranges with funkyyak nodes not yet supported for k in w_parser.idxs_and_shapes.keys(): W[k] = 0.0 for i_layer in range(N_layers): script_weightings = transform[i_layer][i_script_out, :] for i_script in range(N_scripts): z_i_script = w_parser.new_vect(all_z[i_script]) script_weighting = script_weightings[i_script] W[('biases', i_layer)] += z_i_script[('biases', i_layer)] * script_weighting W[('weights', i_layer)] += z_i_script[('weights', i_layer)] * script_weighting return np.concatenate([v.ravel() for v in W.values()]) def loss_from_latents(z_vect, transform_vect, i_script, data): w_vect = transform_weights(z_vect, transform_vect, i_script) return loss_fun(w_vect, **data) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2_init) results = defaultdict(list) def hyperloss(transform_vect, i_hyper, record_results=False): def primal_stochastic_loss(z_vect, transform_vect, i_primal): RS = RandomState((seed, i_hyper, i_primal)) loss = 0.0 for _ in range(N_scripts_per_iter): i_script = RS.randint(N_scripts) N_train = train_data[i_script]['X'].shape[0] idxs = RS.permutation(N_train)[:batch_size] minibatch = dictslice(train_data[i_script], idxs) loss += loss_from_latents(z_vect, transform_vect, i_script, minibatch) reg = regularization(z_vect) if i_primal % 20 == 0: print "Iter {0}, loss {1}, reg {2}".format( i_primal, getval(loss), getval(reg)) print "Full losses: train: {0}, valid: {1}".format( total_loss(train_data, getval(z_vect)), total_loss(valid_data, getval(z_vect))) return loss + reg def total_loss(data, z_vect): return np.mean([ loss_from_latents(z_vect, transform_vect, i_script, data[i_script]) for i_script in range(N_scripts) ]) z_vect_0 = RS.randn( script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_stochastic_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) valid_loss = total_loss(valid_data, z_vect_final) if record_results: results['valid_loss'].append(valid_loss) results['train_loss'].append(total_loss(train_data, z_vect_final)) # results['tests_loss'].append(total_loss(tests_data, z_vect_final)) return valid_loss hyperloss(transform_parser.vect, 0, record_results=True) return results['train_loss'][-1], results['valid_loss'][-1]
def run(script_corr): """Three different parsers: w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single script script_parser[i_script] : weights vector for each script transform_parser[i_layer] : transform matrix (scripts x scripts) for each alphabet""" RS = RandomState((seed, "top_rs")) train_data, valid_data, tests_data = omniglot.load_data_split([11, 2, 2], RS, num_alphabets=N_scripts) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size uncorrelated_mat = np.eye(N_scripts) fully_correlated_mat = np.full((N_scripts, N_scripts), 1.0 / N_scripts) transform_mat = (1 - script_corr) * uncorrelated_mat + script_corr * fully_correlated_mat transform_parser = VectorParser() for i_layer in range(N_layers): if i_layer > 0: transform_parser[i_layer] = uncorrelated_mat else: transform_parser[i_layer] = transform_mat script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) def transform_weights(all_z_vect, transform_vect, i_script_out): all_z = script_parser.new_vect( all_z_vect) transform = transform_parser.new_vect(transform_vect) W = OrderedDict() # Can't use parser because setting plain array ranges with funkyyak nodes not yet supported for k in w_parser.idxs_and_shapes.keys(): W[k] = 0.0 for i_layer in range(N_layers): script_weightings = transform[i_layer][i_script_out, :] for i_script in range(N_scripts): z_i_script = w_parser.new_vect(all_z[i_script]) script_weighting = script_weightings[i_script] W[('biases', i_layer)] += z_i_script[('biases', i_layer)] * script_weighting W[('weights', i_layer)] += z_i_script[('weights', i_layer)] * script_weighting return np.concatenate([v.ravel() for v in W.values()]) def loss_from_latents(z_vect, transform_vect, i_script, data): w_vect = transform_weights(z_vect, transform_vect, i_script) return loss_fun(w_vect, **data) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2_init) results = defaultdict(list) def hyperloss(transform_vect, i_hyper, record_results=False): def sub_primal_stochastic_loss(z_vect, transform_vect, i_primal, i_script): RS = RandomState((seed, i_hyper, i_primal, i_script)) N_train = train_data[i_script]['X'].shape[0] idxs = RS.permutation(N_train)[:batch_size] minibatch = dictslice(train_data[i_script], idxs) loss = loss_from_latents(z_vect, transform_vect, i_script, minibatch) if i_primal % N_thin == 0 and i_script == 0: print "Iter {0}, full losses: train: {1}, valid: {2}".format( i_primal, total_loss(train_data, getval(z_vect)), total_loss(valid_data, getval(z_vect))) if i_script == 0: # Only add regularization once loss += regularization(z_vect) return loss def total_loss(data, z_vect): return np.mean([loss_from_latents(z_vect, transform_vect, i_script, data[i_script]) for i_script in range(N_scripts)]) z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(sub_primal_stochastic_loss), transform_vect, z_vect_0, alpha, beta, N_iters, N_scripts_per_iter, callback=None) valid_loss = total_loss(valid_data, z_vect_final) if record_results: results['valid_loss'].append(valid_loss) results['train_loss'].append(total_loss(train_data, z_vect_final)) # results['tests_loss'].append(total_loss(tests_data, z_vect_final)) return valid_loss hyperloss(transform_parser.vect, 0, record_results=True) return results['train_loss'][-1], results['valid_loss'][-1]
def run(script_corr_init): """Three different parsers: w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single script script_parser[i_script] : weights vector for each script transform_parser[i_layer] : transform matrix (scripts x scripts) for each alphabet""" RS = RandomState((seed, "top_rs")) train_data, valid_data, tests_data = omniglot.load_data_split([11, 2, 2], RS, num_alphabets=N_scripts) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size transform_parser = make_transform(N_scripts, script_corr_init) script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) def get_layers(vect): layers = [] for i_layer in range(N_layers): weights_by_scripts = vect.reshape((N_scripts, N_weights)) weights_idxs, _ = w_parser.idxs_and_shapes[("weights", i_layer)] biases_idxs, _ = w_parser.idxs_and_shapes[("biases", i_layer)] assert weights_idxs.stop == biases_idxs.start layer_idxs = slice(weights_idxs.start, biases_idxs.stop) layers.append(weights_by_scripts[:, layer_idxs]) return layers def transform_weights(z_vect, transform_vect): z_layers = get_layers(z_vect) transform = transform_parser.new_vect(transform_vect) w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)] return np.concatenate(w_layers, axis=1).ravel() def total_loss(w_vect, data): w = script_parser.new_vect(w_vect) return sum([loss_fun(w[i], **script_data) for i, script_data in enumerate(data)]) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2_init) results = defaultdict(list) def hyperloss(transform_vect, i_hyper, record_results=True): RS = RandomState((seed, i_hyper, "hyperloss")) def primal_loss(z_vect, transform_vect, i_primal, record_results): RS = RandomState((seed, i_hyper, i_primal, i_script)) w_vect = transform_weights(z_vect, transform_vect) loss = total_loss(w_vect, train_data) reg = regularization(z_vect) if VERBOSE and record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format( i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg) ) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) w_vect_final = transform_weights(z_vect_final, transform_vect) valid_loss = total_loss(w_vect_final, valid_data) if record_results: results["valid_loss"].append(getval(valid_loss) / N_scripts) results["train_loss"].append(total_loss(w_vect_final, train_data) / N_scripts) return valid_loss hyperloss(transform_parser.vect, 0) return results["train_loss"][-1], results["valid_loss"][-1]
def run(): """Three different parsers: w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single script script_parser[i_script] : weights vector for each script transform_parser[i_layer] : transform matrix (scripts x scripts) for each alphabet""" RS = RandomState((seed, "top_rs")) train_data, valid_data, tests_data = omniglot.load_data_split( [11, 2, 2], RS, num_alphabets=N_scripts) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size transform_parser = make_transform(N_scripts, script_corr_init) script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) def get_layers(vect): layers = [] for i_layer in range(N_layers): weights_by_scripts = vect.reshape((N_scripts, N_weights)) weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)] biases_idxs, _ = w_parser.idxs_and_shapes[('biases', i_layer)] assert weights_idxs.stop == biases_idxs.start layer_idxs = slice(weights_idxs.start, biases_idxs.stop) layers.append(weights_by_scripts[:, layer_idxs]) return layers def transform_weights(z_vect, transform_vect): z_layers = get_layers(z_vect) transform = transform_parser.new_vect(transform_vect) w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)] return np.concatenate(w_layers, axis=1).ravel() def total_loss(w_vect, data): w = script_parser.new_vect(w_vect) return sum([loss_fun(w[i], **script_data) for i, script_data in enumerate(data)]) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2_init) results = defaultdict(list) def hyperloss(transform_vect, i_hyper, record_results=True): RS = RandomState((seed, i_hyper, "hyperloss")) def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = total_loss(w_vect, train_data) reg = regularization(z_vect) if VERBOSE and record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format( i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg)) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) w_vect_final = transform_weights(z_vect_final, transform_vect) valid_loss = total_loss(w_vect_final, valid_data) if record_results: results['valid_loss'].append(getval(valid_loss) / N_scripts) results['train_loss'].append(total_loss(w_vect_final, train_data) / N_scripts) results['tests_loss'].append(total_loss(w_vect_final, tests_data) / N_scripts) return valid_loss grad_transform = grad(hyperloss)(transform_parser.vect, 0, record_results=False) for i, d in enumerate(line_search_dists): new_transform_vect = transform_parser.vect - d * grad_transform hyperloss(new_transform_vect, 0, record_results=True) print "Hyper iter {0}".format(i) print "Results", {k : v[-1] for k, v in results.iteritems()} grad_transform_dict = transform_parser.new_vect(grad_transform).as_dict() return results, grad_transform_dict