maze_sps = None else: raise NotImplementedError limit_low = 0 if args.encoding_limit != 0.0: limit_high = args.encoding_limit else: limit_high = data['coarse_mazes'].shape[2] # modify the encoding limit to account for all of the environments if args.tile_mazes: limit_high *= int(np.ceil(np.sqrt(args.n_mazes))) encoding_func, repr_dim = get_encoding_function(args, limit_low=limit_low, limit_high=limit_high) # TODO: create a way to cache the validation set, so it doesn't need to be remade every time for multiple runs. # have an init flag to use the cache, rather than pickle the entire object # check whether the cache exists before generating the object # if args.use_cache == 1: # dataset_name = args.dataset_dir.split('/')[-1] # cache_fname = 'validation_set_cache/{}_{}_dim{}_{}mazes_{}goals_id_{}_seed{}.npz'.format( # dataset_name, # args.spatial_encoding, # args.dim, # args.n_mazes_tested, # args.n_goals_tested, # args.maze_id_type,
parser.add_argument('--n-tiles', type=int, default=8, help='number of layers for tile coding') parser.add_argument('--n-bins', type=int, default=8, help='number of bins for tile coding') parser.add_argument('--ssp-scaling', type=float, default=1.0) parser.add_argument('--dim', type=int, default=512) parser.add_argument('--seed', type=int, default=13) args = parser.parse_args() encoding_func, dim = get_encoding_function(args, limit_low=args.limit_low, limit_high=args.limit_high) env = Environment(encoding_func=encoding_func, limit_low=args.limit_low, limit_high=args.limit_high) # encoding specific string encoding_specific = '' if args.spatial_encoding == 'ssp': encoding_specific = args.ssp_scaling elif args.spatial_encoding == 'frozen-learned': encoding_specific = args.frozen_model elif args.spatial_encoding == 'pc-gauss' or args.spatial_encoding == 'pc-gauss-softmax': encoding_specific = args.pc_gauss_sigma elif args.spatial_encoding == 'pc-dog':
heatmaps = data['heatmaps'] normalized_heatmaps = data['normalized_heatmaps'] else: print("Generating Data") heatmaps = np.zeros((len(encodings), args.n_points, args.res, args.res)) normalized_heatmaps = np.zeros( (len(encodings), args.n_points, args.res, args.res)) points = np.random.uniform(low=args.limit_low, high=args.limit_high, size=(args.n_points, 2)) for ei, encoding in enumerate(encodings): args.spatial_encoding = encoding encoding_func, repr_dim = get_encoding_function( args, limit_low=args.train_limit_low, limit_high=args.train_limit_high) # # input is maze, loc, goal ssps, output is 2D direction to move # if 'learned' in args.spatial_encoding: # enc_func = encoding_func_from_model(args.model, args.dim) # # # def encoding_func(x, y): # return enc_func(np.array([x, y])) # else: # pass activations = np.zeros((args.res, args.res, args.dim)) normalized_activations = np.zeros((args.res, args.res, args.dim))
parser.add_argument('--n-items', type=int, default=7) parser.add_argument('--sigma', type=float, default=0.005, help='additional gaussian noise to add') parser = add_encoding_params(parser) args = parser.parse_args() args.spatial_encoding = 'sub-toroid-ssp' args.dim = 256 args.new_dataset = True enc_func, repr_dim = get_encoding_function(args, limit_low=-args.limit, limit_high=args.limit) X = spa.SemanticPointer(data=enc_func(1, 0)) Y = spa.SemanticPointer(data=enc_func(0, 1)) xs = np.linspace(-args.limit, args.limit, args.res) ys = np.linspace(-args.limit, args.limit, args.res) # heatmap_vectors = get_heatmap_vectors(xs, ys, X, Y) heatmap_vectors = get_encoding_heatmap_vectors(xs, ys, repr_dim, enc_func, normalize=False)
def main(): parser = argparse.ArgumentParser('Train a network to clean up a noisy spatial semantic pointer') parser.add_argument('--loss-function', type=str, default='cosine', choices=['cosine', 'mse', 'combined', 'scaled']) parser.add_argument('--noise-type', type=str, default='memory', choices=['memory', 'gaussian', 'both']) parser.add_argument('--sigma', type=float, default=0.005, help='sigma on the gaussian noise if noise-type==gaussian') parser.add_argument('--spatial-encoding', type=str, default='sub-toroid-ssp', choices=[ 'ssp', 'hex-ssp', 'random', '2d', '2d-normalized', 'one-hot', 'hex-trig', 'sub-toroid-ssp', 'var-sub-toroid-ssp', 'proj-ssp', 'orth-proj-ssp', 'trig', 'random-trig', 'random-proj', 'learned', 'frozen-learned', 'pc-gauss', 'pc-dog', 'tile-coding' ], help='coordinate encoding') parser.add_argument('--hex-freq-coef', type=float, default=2.5, help='constant to scale frequencies by for hex-trig') parser.add_argument('--pc-gauss-sigma', type=float, default=0.25, help='sigma for the gaussians') parser.add_argument('--pc-diff-sigma', type=float, default=0.5, help='sigma for subtracted gaussian in DoG') parser.add_argument('--n-tiles', type=int, default=8, help='number of layers for tile coding') parser.add_argument('--n-bins', type=int, default=0, help='number of bins for tile coding') parser.add_argument('--ssp-scaling', type=float, default=1.0) parser.add_argument('--phi', type=float, default=0.5, help='phi as a fraction of pi for orth-proj-ssp') parser.add_argument('--n-proj', type=int, default=3, help='projection dimension for sub toroids') parser.add_argument('--scale-ratio', type=float, default=0, help='ratio between sub toroid scales') parser.add_argument('--val-period', type=int, default=10, help='number of epochs before a test/validation set run') parser.add_argument('--train-fraction', type=float, default=.5, help='proportion of the dataset to use for training') parser.add_argument('--n-samples', type=int, default=10000, help='Number of memories to generate. Total samples will be n-samples * n-items') parser.add_argument('--n-items', type=int, default=12, help='number of items in memory. Proxy for noisiness') parser.add_argument('--dim', type=int, default=256, help='Dimensionality of the semantic pointers') parser.add_argument('--hidden-size', type=int, default=512, help='Hidden size of the cleanup network') # parser.add_argument('--limits', type=str, default="0,13,0,13", help='The limits of the space') parser.add_argument('--limits', type=str, default="-5,5,-5,5", help='The limits of the space') parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--seed', type=int, default=13) parser.add_argument('--logdir', type=str, default='trained_models/ssp_cleanup', help='Directory for saved model and tensorboard log') parser.add_argument('--load-model', type=str, default='', help='Optional model to continue training from') # parser.add_argument('--name', type=str, default='', # help='Name of output folder within logdir. Will use current date and time if blank') parser.add_argument('--weight-histogram', action='store_true', help='Save histograms of the weights if set') parser.add_argument('--optimizer', type=str, default='adam', choices=['rmsprop', 'adam', 'sgd']) parser.add_argument('--dataset-seed', type=int, default=14) args = parser.parse_args() args.limits = tuple(float(v) for v in args.limits.split(',')) np.random.seed(args.seed) torch.manual_seed(args.seed) dataset_name = 'data/ssp_cleanup_dataset_dim{}_seed{}_items{}_samples{}.npz'.format( args.dim, args.seed, args.n_items, args.n_samples ) if not os.path.exists('data'): os.makedirs('data') rng = np.random.RandomState(seed=args.seed) # x_axis_sp = make_good_unitary(args.dim, rng=rng) # y_axis_sp = make_good_unitary(args.dim, rng=rng) limit_low = args.limits[0] limit_high = args.limits[1] encoding_func, repr_dim = get_encoding_function(args, limit_low=limit_low, limit_high=limit_high) if os.path.exists(dataset_name): print("Loading dataset") data = np.load(dataset_name) clean_ssps = data['clean_ssps'] noisy_ssps = data['noisy_ssps'] else: print("Generating SSP cleanup dataset") # TODO: save the dataset the first time it is created, so it can be loaded the next time clean_ssps, noisy_ssps, coords = generate_cleanup_dataset( # x_axis_sp=x_axis_sp, # y_axis_sp=y_axis_sp, encoding_func=encoding_func, n_samples=args.n_samples, dim=args.dim, n_items=args.n_items, limits=args.limits, seed=args.dataset_seed, ) print("Dataset generation complete. Saving dataset") np.savez( dataset_name, clean_ssps=clean_ssps, noisy_ssps=noisy_ssps, coords=coords, # x_axis_vec=x_axis_sp.v, # y_axis_vec=x_axis_sp.v, ) # Add gaussian noise if required if args.noise_type == 'gaussian': noisy_ssps = clean_ssps + np.random.normal(loc=0, scale=args.sigma, size=noisy_ssps.shape) elif args.noise_type == 'both': noisy_ssps += np.random.normal(loc=0, scale=args.sigma, size=noisy_ssps.shape) n_samples = clean_ssps.shape[0] n_train = int(args.train_fraction * n_samples) n_test = n_samples - n_train assert(n_train > 0 and n_test > 0) train_clean = clean_ssps[:n_train, :] train_noisy = noisy_ssps[:n_train, :] test_clean = clean_ssps[n_train:, :] test_noisy = noisy_ssps[n_train:, :] dataset_train = GenericDataset(inputs=train_noisy, outputs=train_clean) dataset_test = GenericDataset(inputs=test_noisy, outputs=test_clean) trainloader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=0, ) # For testing just do everything in one giant batch testloader = torch.utils.data.DataLoader( dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=0, ) model = FeedForward(input_size=args.dim, hidden_size=args.hidden_size, output_size=args.dim) # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = osp.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) if args.weight_histogram: # Log the initial parameters for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), 0) mse_criterion = nn.MSELoss() cosine_criterion = nn.CosineEmbeddingLoss() if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for e in range(args.epochs): print('Epoch: {0}'.format(e + 1)) if e % args.val_period == 0: run_evaluation( testloader, model, writer, e, mse_criterion, cosine_criterion, name='test' ) avg_mse_loss = 0 avg_cosine_loss = 0 avg_combined_loss = 0 n_batches = 0 for i, data in enumerate(trainloader): noisy, clean = data if noisy.size()[0] != args.batch_size: continue # Drop data, not enough for a batch optimizer.zero_grad() outputs = model(noisy) mse_loss = mse_criterion(outputs, clean) # Modified to use CosineEmbeddingLoss cosine_loss = cosine_criterion(outputs, clean, torch.ones(args.batch_size)) # print(loss.data.item()) loss = cosine_loss + mse_loss if args.loss_function == 'cosine': cosine_loss.backward() elif args.loss_function == 'mse': mse_loss.backward() elif args.loss_function == 'combined': loss.backward() avg_cosine_loss += cosine_loss.data.item() avg_mse_loss += mse_loss.data.item() avg_combined_loss += loss.data.item() n_batches += 1 optimizer.step() print(avg_cosine_loss / n_batches) if args.logdir != '': if n_batches > 0: avg_cosine_loss /= n_batches writer.add_scalar('avg_cosine_loss', avg_cosine_loss, e + 1) writer.add_scalar('avg_mse_loss', avg_mse_loss, e + 1) writer.add_scalar('avg_combined_loss', avg_mse_loss, e + 1) if args.weight_histogram and (e + 1) % 10 == 0: for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), e + 1) print("Testing") run_evaluation(testloader, model, writer, e, mse_criterion, cosine_criterion, name='test') # with torch.no_grad(): # # # Everything is in one batch, so this loop will only happen once # for i, data in enumerate(testloader): # # noisy, clean = data # # outputs = model(noisy) # # mse_loss = mse_criterion(outputs, clean) # # Modified to use CosineEmbeddingLoss # cosine_loss = cosine_criterion(outputs, clean, torch.ones(len(dataset_test))) # # print(cosine_loss.data.item()) # # if args.logdir != '': # # TODO: get a visualization of the performance # writer.add_scalar('test_cosine_loss', cosine_loss.data.item()) # writer.add_scalar('test_mse_loss', mse_loss.data.item()) # Close tensorboard writer if args.logdir != '': writer.close() torch.save(model.state_dict(), osp.join(save_dir, 'model.pt')) params = vars(args) # # Additionally save the axis vectors used # params['x_axis_vec'] = list(x_axis_sp.v) # params['y_axis_vec'] = list(y_axis_sp.v) with open(osp.join(save_dir, "params.json"), "w") as f: json.dump(params, f)
n_noise_levels = len(noise_levels) # n_dimensions = len(dimensions) df = pd.DataFrame() for config in configs: print(config.spatial_encoding) print('Limit: {}'.format(config.limit_high)) xs = np.linspace(config.limit_low, config.limit_high, args.res) ys = np.linspace(config.limit_low, config.limit_high, args.res) encoding_func, repr_dim = get_encoding_function( # config, limit_low=args.train_limit_low, limit_high=args.train_limit_high config, limit_low=config.limit_low, limit_high=config.limit_high) heatmap_vectors = np.zeros((len(xs), len(ys), config.dim)) flat_heatmap_vectors = np.zeros((len(xs) * len(ys), config.dim)) for i, x in enumerate(xs): for j, y in enumerate(ys): heatmap_vectors[i, j, :] = encoding_func(x, y) flat_heatmap_vectors[i * len(ys) + j, :] = heatmap_vectors[i, j, :].copy() # Normalize. This is required for frozen-learned to work