def test_separate_networks( configs: dict[str, Any], make_plots: bool = True, **kwargs, ) -> TestOutputs: """Test training on separate networks.""" t0 = time.time() logger.info(f'Testing separate networks') configs_ = dict(copy.deepcopy(configs)) configs_['dynamics_config']['separate_networks'] = True train_out = train(configs_, make_plots=make_plots, verbose=False, num_chains=4, **kwargs) x = train_out.x dynamics = train_out.dynamics logdir = train_out.logdir runs_dir = os.path.join(logdir, 'inference') run_out = None if RANK == 0: run_out = run(dynamics, configs_, x=x, runs_dir=runs_dir, make_plots=make_plots) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def test_resume_training( configs: dict[str, Any], make_plots: bool = True, **kwargs, ) -> TestOutputs: """Test restoring a training session from a checkpoint.""" t0 = time.time() logger.info(f'Testing resuming training') configs_ = copy.deepcopy(configs) assert configs_.get('restore_from', None) is not None train_out = train(configs_, make_plots=make_plots, verbose=False, num_chains=4, **kwargs) dynamics = train_out.dynamics logdir = train_out.logdir x = train_out.x runs_dir = os.path.join(logdir, 'inference') run_out = None if RANK == 0: run_out = run(dynamics, configs_, x=x, runs_dir=runs_dir) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def main(configs: dict[str, Any]): """Main method for training.""" # tf.keras.backend.set_floatx('float32') import numpy as np custom_betas = None if configs.get('discrete_beta', False): b0 = configs.get('beta_init', None) # type: float b1 = configs.get('beta_final', None) # type: float db = b1 - b0 # per_step = (b1 - b0) // configs.get('train_steps', None) per_step = int(configs.get('train_steps', None) // (b1 + 1 - b0)) custom_betas = [] for b in range(int(b0), int(b1 + 1)): betas_ = b * np.ones(per_step) custom_betas.append(betas_) custom_betas = np.stack(np.array(custom_betas)) custom_betas = tf.convert_to_tensor(custom_betas.flatten(), dtype=tf.keras.backend.floatx()) logger.info(f'Using discrete betas!!!') logger.info(f'custom_betas: {custom_betas}') # -- Train model ---------------------------------------------------- train_out = train(configs=configs, make_plots=True, custom_betas=custom_betas) x = train_out.x dynamics = train_out.dynamics configs = train_out.configs # ------------------------------------------------------------------ # -- Run inference on trained model --------------------------------- run_steps = configs.get('run_steps', 20000) if run_steps > 0: x = tf.random.uniform(x.shape, *(-PI, PI)) beta = configs.get('beta_final') nchains = configs.get('num_chains', configs.get('nchains', None)) if nchains is not None: x = x[:nchains] # if configs.get('small_batch', False): # batch_size = 256 # old_shape = configs['dynamics_config']['x_shape'] # new_shape = (batch_size, *old_shape[1:]) # configs['dynamics_config']['x_shape'] = new_shape # dynamics = build_dynamics(configs) # x = x[:batch_size] _ = run(dynamics, configs, x, beta=beta, make_plots=True, therm_frac=0.1, num_chains=nchains, save_x=False)
def main(configs: dict[str, Any], **kwargs): t0 = time.time() train_out = train(configs, **kwargs) run_out = None if RANK == 0: run_out = run(train_out.dynamics, configs, make_plots=True, runs_dir=os.path.join(train_out.logdir, 'inference')) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def test_single_network(flags: AttrDict): """Test training on single network.""" flags.dynamics_config.separate_networks = False x, dynamics, train_data, flags = train(flags) beta = flags.get('beta', 1.) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
def test_resume_training(log_dir: str): """Test restoring a training session from a checkpoint.""" flags = AttrDict( dict(io.loadz(os.path.join(log_dir, 'training', 'FLAGS.z')))) flags.log_dir = log_dir flags.train_steps += flags.get('train_steps', 10) x, dynamics, train_data, flags = train(flags) beta = flags.get('beta', 1.) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
def test_separate_networks(flags: AttrDict): """Test training on separate networks.""" flags.hmc_steps = 0 # flags.log_dir = None flags.log_dir = io.make_log_dir(flags, 'GaugeModel', LOG_FILE) flags.dynamics_config.separate_networks = True flags.compile = False x, dynamics, train_data, flags = train(flags) beta = flags.get('beta', 1.) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
def testing(nn_model, nb_photon, nb_epoch, lr, batch_size, GPU=False): """ :param nn_model: a PyTorch model :param nb_epoch :param lr :param GPU: a boolean flag that enables some cuda features from the PyTorch library :return: the performance of the model on a given dataset (nb_photon) """ best_precision = 0 optimizer = optim.Adam(nn_model.parameters(), lr=lr, weight_decay=0.05) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') training_losses = [] accuracies = [] validation_losses = [] train_loader, validation_loader, accuracy_loader = preprocess(nb_photon, batch_size) for epoch in tqdm(range(1, nb_epoch + 1)): # Train model nn_model = train(nn_model, train_loader, optimizer, GPU) # Accuracy on training set train_precision, loss_training = get_accuracy(nn_model, train_loader, GPU) training_losses.append(loss_training) # Accuracy and precision on validation set precision, loss_validation = get_accuracy(nn_model, validation_loader, GPU) validation_losses.append(loss_validation) accuracies.append(precision) if precision > best_precision: best_precision = precision # Scheduler scheduler.step(loss_validation) return best_precision, accuracy_loader, training_losses, validation_losses, accuracies
def test_conv_net( configs: dict[str, Any], make_plots: bool = True, **kwargs, ) -> TestOutputs: """Test convolutional networks.""" t0 = time.time() logger.info(f'Testing convolutional network') configs = AttrDict(**dict(copy.deepcopy(configs))) # flags.use_conv_net = True configs['dynamics_config']['use_conv_net'] = True # configs['conv_config'] = dict( # sizes=[2, 2], # filters=[4, 8], # pool_sizes=[2, 2], # use_batch_norm=True, # conv_paddings=['valid', 'valid'], # conv_activations=['relu', 'relu'], # input_shape=configs['dynamics_config']['x_shape'][1:], # ) train_out = train(configs, make_plots=make_plots, num_chains=4, verbose=False, **kwargs) runs_dir = os.path.join(train_out.logdir, 'inference') run_out = None if RANK == 0: run_out = run(train_out.dynamics, configs, x=train_out.x, runs_dir=runs_dir, make_plots=make_plots) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def test_conv_net(flags: AttrDict): """Test convolutional networks.""" # flags.use_conv_net = True flags['dynamics_config']['use_conv_net'] = True flags.conv_config = ConvolutionConfig( sizes=[2, 2], filters=[16, 32], pool_sizes=[2, 2], use_batch_norm=True, conv_paddings=['valid', 'valid'], conv_activations=['relu', 'relu'], input_shape=flags['dynamics_config']['lattice_shape'][1:], ) x, dynamics, train_data, flags = train(flags) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
S_prime = training_sequence.shape[-1] S = epochs * S_prime ### Run training # Create the network network = SNNetwork(n_input_neurons, n_hidden_neurons, n_output_neurons, topology, n_basis_feedforward=num_basis_feedforward, feedforward_filter=feedforward_filter, n_basis_feedback=num_basis_feedback, feedback_filter=feedback_filter, tau_ff=10, tau_fb=10, mu=mu, weights_magnitude=0.01) # Train it train(network, training_sequence, learning_rate, kappa, deltas, r, alpha) print('Number of samples trained on: %d, time: %f' % (epochs, time.time() - t0)) ### Test accuracy # The last 100 samples of each class are kept for test test_indices = np.hstack( (np.arange(900, 1000)[:epochs_test], np.arange(1900, 2000)[:epochs_test])) acc, loss = get_acc_and_loss(network, dataset, test_indices) print('Final test accuracy: %f' % acc)
def main(args): """Main method for training.""" hmc_steps = args.get('hmc_steps', 0) tf.keras.backend.set_floatx('float32') log_file = os.path.join(os.getcwd(), 'log_dirs.txt') x = None log_dir = args.get('log_dir', None) beta_init = args.get('beta_init', None) beta_final = args.get('beta_final', None) if log_dir is not None: # we want to restore from latest checkpoint train_steps = args.get('train_steps', None) args = restore_flags(args, os.path.join(args.log_dir, 'training')) args.train_steps = train_steps # use newly passed value args.restore = True if beta_init != args.get('beta_init', None): args.beta_init = beta_init if beta_final != args.get('beta_final', None): args.beta_final = beta_final args.train_steps = train_steps else: # New training session timestamps = AttrDict({ 'month': io.get_timestamp('%Y_%m'), 'time': io.get_timestamp('%Y-%M-%d-%H%M%S'), 'hour': io.get_timestamp('%Y-%m-%d-%H'), 'minute': io.get_timestamp('%Y-%m-%d-%H%M'), 'second': io.get_timestamp('%Y-%m-%d-%H%M%S'), }) args.log_dir = io.make_log_dir(args, 'GaugeModel', log_file, timestamps=timestamps) io.write(f'{args.log_dir}', log_file, 'a') args.restore = False if hmc_steps > 0: x, _, eps = train_hmc(args) args.dynamics_config['eps'] = eps dynamics_config = args.get('dynamics_config', None) if dynamics_config is not None: log_dir = dynamics_config.get('log_dir', None) if log_dir is not None: eps_file = os.path.join(log_dir, 'training', 'models', 'eps.z') if os.path.isfile(eps_file): io.log(f'Loading eps from: {eps_file}') eps = io.loadz(eps_file) args.dynamics_config['eps'] = eps _, dynamics, _, args = train(args, x=x) # ==== # Run inference on trained model if args.get('run_steps', 5000) > 0: # ==== # Run with random start dynamics, _, _ = run(dynamics, args) # ==== # Run HMC args.hmc = True args.dynamics_config['eps'] = 0.15 hmc_dir = os.path.join(args.log_dir, 'inference_hmc') _ = run_hmc(args=args, hmc_dir=hmc_dir)
S = epochs * S_prime for _ in range(num_ite): ### Run training # Train it t0 = time.time() # Create the network network = SNNetwork(**utils.training_utils.make_network_parameters( n_input_neurons, n_output_neurons, n_hidden_neurons, topology_type=args.topology_type)) # Train it train(network, input_train, output_train, indices, learning_rate, kappa, deltas, alpha, r) print('Number of samples trained on: %d, time: %f' % (epochs, time.time() - t0)) ### Test accuracy test_indices = np.random.choice(np.arange(input_test.shape[0]), [epochs_test], replace=False) np.random.shuffle(test_indices) acc, loss = get_acc_and_loss(network, input_test[test_indices], output_test[test_indices]) test_accs.append(acc) print('Final test accuracy: %f' % acc)
test_loader = torch.utils.data.DataLoader(test_cilia, batch_size=1, shuffle=False) print("Loaded testing set!") model = tiramisu.FCDenseNet103(n_classes=3, in_channels=1).cuda() model.apply(training_utils.weights_init) optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=1e-4) criterion = nn.NLLLoss().cuda() # Training process for epoch in range(1, N_EPOCHS + 1): since = time.time() ### Train ### trn_loss, trn_err = training_utils.train(model, train_loader, optimizer, criterion, epoch) print('Epoch {:d}\nTrain - Loss: {:.4f}, Acc: {:.4f}'.format( epoch, trn_loss, 1 - trn_err)) time_elapsed = time.time() - since print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) ### Test ### val_loss, val_err = training_utils.test(model, val_loader, criterion, epoch) print('Val - Loss: {:.4f} | Acc: {:.4f}'.format(val_loss, 1 - val_err)) time_elapsed = time.time() - since print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60)) ### Checkpoint ### training_utils.save_weights(model, epoch, val_loss, val_err) ### Adjust Lr ###
def run(rank, size): train_indices = torch.zeros([3, 3000], dtype=torch.long) test_indices = torch.zeros([3, 333], dtype=torch.long) local_data_path = '/home/cream/Desktop/arafin_experiments/SOCC/FL-SNN/data/' save_path = os.getcwd() + r'/results' datasets = {'mnist_dvs_10': r'mnist_dvs_25ms_26pxl_10_digits.hdf5'} dataset = local_data_path + datasets['mnist_dvs_10'] input_train = torch.FloatTensor( tables.open_file(dataset).root.train.data[:]) output_train = torch.FloatTensor( tables.open_file(dataset).root.train.label[:]) input_test = torch.FloatTensor(tables.open_file(dataset).root.test.data[:]) output_test = torch.FloatTensor( tables.open_file(dataset).root.test.label[:]) ### Network parameters n_input_neurons = input_train.shape[1] n_output_neurons = output_train.shape[1] n_hidden_neurons = 16 epochs = input_train.shape[0] epochs_test = input_test.shape[0] learning_rate = 0.005 / n_hidden_neurons kappa = 0.2 alpha = 1 deltas = 1 num_ite = 1 r = 0.3 weights_magnitude = 0.05 task = 'supervised' mode = 'train', tau_ff = 10 tau_fb = 10 mu = 1.5, n_basis_feedforward = 8 feedforward_filter = filters.raised_cosine_pillow_08 feedback_filter = filters.raised_cosine_pillow_08 n_basis_feedback = 1 topology = torch.ones([ n_hidden_neurons + n_output_neurons, n_input_neurons + n_hidden_neurons + n_output_neurons ], dtype=torch.float) topology[[i for i in range(n_output_neurons + n_hidden_neurons)], [ i + n_input_neurons for i in range(n_output_neurons + n_hidden_neurons) ]] = 0 assert torch.sum(topology[:, :n_input_neurons]) == ( n_input_neurons * (n_hidden_neurons + n_output_neurons)) print(topology[:, n_input_neurons:]) if rank == 0: train_indicess = torch.tensor(np.random.choice(np.arange( input_train.shape[0]), [3, 3000], replace=False), dtype=torch.long) test_indicess = torch.tensor(np.random.choice(np.arange( input_test.shape[0]), [3, 333], replace=False), dtype=torch.long) dist.send(tensor=train_indicess, dst=1) dist.send(tensor=train_indicess, dst=2) dist.send(tensor=train_indicess, dst=3) else: dist.recv(tensor=train_indices, src=0) dist.barrier() if rank == 0: dist.send(tensor=test_indicess, dst=1) dist.send(tensor=test_indicess, dst=2) dist.send(tensor=test_indicess, dst=3) else: dist.recv(tensor=test_indices, src=0) dist.barrier() if rank != 0: training_data = input_train[train_indices[rank - 1, :]] training_label = output_train[train_indices[rank - 1, :]] test_data = input_test[test_indices[rank - 1, :]] test_label = output_test[test_indices[rank - 1, :]] indices = np.random.choice(np.arange(training_data.shape[0]), [training_data.shape[0]], replace=True) S_prime = training_data.shape[-1] S = epochs * S_prime num_ite = 1 test_accs = [] for _ in range(num_ite): ### Run training # Train it t0 = time.time() # Create the network network = SNNetwork(**utils.training_utils.make_network_parameters( n_input_neurons, n_output_neurons, n_hidden_neurons, topology_type='fully_connected')) # Train it train(network, training_data, training_label, indices, learning_rate, kappa, deltas, alpha, r) print('Number of samples trained on: %d, time: %f' % (epochs, time.time() - t0)) ### Test accuracy test_indx = np.random.choice(np.arange(test_data.shape[0]), [test_data.shape[0]], replace=False) np.random.shuffle(test_indx) acc, loss = get_acc_and_loss(network, test_data[test_indx], test_label[test_indx]) test_accs.append(acc) print('Final test accuracy: %f' % acc)
def main(): local_data_path = r'mnist-dvs/' save_path = os.getcwd() + r'/results' datasets = { 'mnist_dvs_2': r'mnist_dvs_25ms_26pxl_2_digits.hdf5', 'mnist_dvs_10': r'mnist_dvs_25ms_26pxl_10_digits.hdf5', } dataset = local_data_path + datasets[args.dataset] device = 'cuda' if torch.cuda.is_available() else 'cpu' input_train = torch.cuda.FloatTensor( tables.open_file(dataset).root.train.data[:]) output_train = torch.cuda.FloatTensor( tables.open_file(dataset).root.train.label[:]) input_test = torch.cuda.FloatTensor( tables.open_file(dataset).root.test.data[:]) output_test = torch.cuda.FloatTensor( tables.open_file(dataset).root.test.label[:]) ### Network parameters n_input_neurons = input_train.shape[1] n_output_neurons = output_train.shape[1] n_hidden_neurons = 4 ### Learning parameters if args.epochs: epochs = args.epochs else: epochs = input_train.shape[0] if args.epochs_test: epochs_test = args.epochs_test else: epochs_test = input_test.shape[0] test_accs = [] learning_rate = args.lr / n_hidden_neurons kappa = args.kappa alpha = args.alpha deltas = args.deltas num_ite = args.num_ite r = args.r ### Randomly select training samples indices = np.random.choice(np.arange(input_train.shape[0]), [epochs], replace=True) S_prime = input_train.shape[-1] S = epochs * S_prime for _ in range(num_ite): ### Run training # Train it t0 = time.time() # Create the network network = SNNetwork( **make_network_parameters(n_input_neurons, n_output_neurons, n_hidden_neurons, topology_type=args.topology_type, device=device)) network = network.to(device) # Train train(network, input_train, output_train, indices, learning_rate, kappa, deltas, alpha, r, device) print('Number of samples trained on: %d, time: %f' % (epochs, time.time() - t0)) ### Test accuracy test_indices = np.random.choice(np.arange(input_test.shape[0]), [epochs_test], replace=False) np.random.shuffle(test_indices) acc, loss = get_acc_and_loss(network, input_test[test_indices], output_test[test_indices], device) test_accs.append(acc) print('Final test accuracy: %f' % acc) np.save(save_path + '/acc_' + args.dataset + args.topology_type + '.npy', test_accs)