def _evaluate_genome_parallel(genome: Genome, loss, beta_type, problem_type, is_testing, batch_size=10000, n_samples=10, is_gpu=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' # kl_posterior = 0 # # kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(dataset.x) / batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) if is_testing: x_batch, y_batch = dataset.x_test, dataset.y_test else: x_batch, y_batch = dataset.x_train, dataset.y_train x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) with torch.no_grad(): # forward pass output, kl_qw_pw = network(x_batch) output, _, y_batch = _process_output_data(output, y_true=y_batch, n_samples=n_samples, n_output=genome.n_output, problem_type=problem_type, is_pass=True) beta = get_beta(beta_type=beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) kl_posterior = loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() return loss_value
def evaluate_genome(genome: Genome, loss, beta_type, problem_type, batch_size=10000, n_samples=10, is_gpu=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) # ''' # dataset = get_dataset(genome.genome_config.dataset, testing=True) # dataset.generate_data() kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(dataset.x) / batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) x_batch, y_batch = dataset.x, dataset.y x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) if is_gpu: x_batch, y_batch = x_batch.cuda(), y_batch.cuda() with torch.no_grad(): # forward pass output, _ = network(x_batch) # print(self.config.beta_type) beta = get_beta(beta_type=beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) # print(f'Beta: {beta}') kl_posterior += loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() return loss_value
def test_network_structure_miso_2(self): self.config.n_output = 2 is_cuda = True genome = generate_genome_given_graph( graph=((-1, 2), (-2, 2), (2, 0), (2, 1), (-1, 0), (-1, 1), (-2, 1), (-2, 0)), connection_weights=(1.0, 2.0, 3.0, 4.0, 0, 1.0, 0, 1.0)) n_samples = 1 input_data = torch.tensor([[1.0, 1.0]]) input_data = input_data.view(-1, genome.n_input).repeat(n_samples, 1) model = ComplexStochasticNetwork(genome=genome, is_trainable=True, is_cuda=is_cuda) self.assertEqual(model.layers[0].input_keys, [2, -2, -1]) self.assertTrue( torch.allclose(model.layers[0].weight_mean, torch.tensor([[3.0, 1.0, 0.0], [4.0, 0.0, 1.0]]), atol=1e-02)) self.assertEqual(model.layers[1].input_keys, [-2, -1]) self.assertTrue( torch.allclose(model.layers[1].weight_mean, torch.tensor([[2.0, 1.0]]), atol=1e-02)) if is_cuda: input_data = input_data.cuda() model.cuda() y, _ = model(input_data) expected_y = torch.tensor([[10.0, 13.0]]) if is_cuda: y = y.cpu() self.assertTrue(torch.allclose(y, expected_y, atol=1e-02))
def evaluate_genome_with_dataloader(genome: Genome, data_loader, loss, beta_type, problem_type, batch_size=10000, n_samples=10, is_gpu=False, return_all=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(data_loader) / batch_size) network.eval() chunks_x = [] chunks_y_pred = [] chunks_y_true = [] # calculate Data log-likelihood (p(y*|x*,D)) for batch_idx, (x_batch, y_batch) in enumerate(data_loader): x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) with torch.no_grad(): # forward pass output, _ = network(x_batch) beta = get_beta(beta_type=beta_type, m=m, batch_idx=batch_idx, epoch=1, n_epochs=1) kl_posterior += loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) if return_all: chunks_x.append(x_batch) chunks_y_pred.append(output) chunks_y_true.append(y_batch) loss_value = kl_posterior.item() if return_all: x = torch.cat(chunks_x, dim=0) y_pred = torch.cat(chunks_y_pred, dim=0) y_true = torch.cat(chunks_y_true, dim=0) return x, y_true, y_pred, loss_value return loss_value
class StandardTrainer: def __init__(self, dataset, n_epochs, n_output, problem_type, n_samples, beta, is_cuda, weight_decay=0.0005, lr=0.01): self.dataset = dataset self.is_cuda = is_cuda self.lr = lr self.weight_decay = weight_decay self.n_epochs = n_epochs self.n_output = n_output self.problem_type = problem_type self.n_samples = n_samples self.beta = beta self.network = None self.criterion = None self.optimizer = None self.final_loss = None self.last_update = 0 self.best_loss_val = 10000 self.best_network_state = None def train(self, genome): kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network self.network = ComplexStochasticNetwork(genome=genome, is_trainable=True, is_cuda=self.is_cuda) self.criterion = get_loss(problem_type=self.problem_type) if self.is_cuda: self.network.cuda() self.criterion.cuda() self.optimizer = Adam(self.network.parameters(), lr=self.lr, weight_decay=self.weight_decay) x_batch, y_batch = self.dataset.x_train, self.dataset.y_train x_train, x_val, y_train, y_val = self.train_val_split( x_batch, y_batch, problem_type=self.problem_type, val_ratio=0.2) x_train, _ = _prepare_batch_data( x_batch=x_train, y_batch=y_train, problem_type=self.problem_type, is_gpu=False, # this could be removed n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) x_val, _ = _prepare_batch_data(x_batch=x_val, y_batch=y_val, problem_type=self.problem_type, is_gpu=False, n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) if self.is_cuda: x_train = x_train.cuda() y_train = y_train.cuda() x_val = x_val.cuda() y_val = y_val.cuda() self.network.train() for epoch in range(self.n_epochs): loss_epoch = self._train_one(x_train, y_train, kl_qw_pw) # if epoch % 10 == 0: _, _, _, loss_val = self._evaluate(x_val, y_val, network=self.network) if loss_val < self.best_loss_val: self.best_loss_val = loss_val self.best_network_state = copy.deepcopy( self.network.state_dict()) self.last_update = epoch if epoch - self.last_update > N_EPOCHS_WITHOUT_IMPROVING: print( f'Breaking training as not improving for {N_EPOCHS_WITHOUT_IMPROVING} epochs' ) break if epoch % 200 == 0: print(f'Epoch = {epoch}. Training Loss: {loss_epoch}. ' f'Best Val. Loss: {self.best_loss_val}') self.network.clear_non_existing_weights( clear_grad=False) # reset non-existing weights self.final_loss = loss_epoch print(f'Final Epoch = {epoch}. Training Error: {self.final_loss}') def _train_one(self, x_batch, y_batch, kl_qw_pw): # TODO: the kl_qw_pw returned by the network gives problems with backprop. output, kl_qw_pw = self.network(x_batch) output, _ = calculate_multinomial(output, self.n_samples, self.n_output) loss = self.criterion(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=self.beta) loss_epoch = loss.data.item() self.optimizer.zero_grad() loss.backward() # Backward Propagation # self.network.clear_non_existing_weights() # zero_grad for those unexistent parameters self.optimizer.step() # Optimizer update # self.network.clear_non_existing_weights(clear_grad=False) # reset non-existing weights return loss_epoch def _evaluate(self, x_batch, y_batch, network): network.eval() chunks_x = [] chunks_y_pred = [] chunks_y_true = [] with torch.no_grad(): output, kl_qw_pw = network(x_batch) output, _ = calculate_multinomial(output, self.n_samples, self.n_output) # output, _, y_batch = _process_output_data(output, y_true=y_batch, n_samples=n_samples, # n_output=genome.n_output, problem_type=problem_type, is_pass=is_pass) loss = self.criterion(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=self.beta) # loss = self.criterion(output, y_batch) loss_epoch = loss.data.item() chunks_x.append(x_batch) chunks_y_pred.append(output) chunks_y_true.append(y_batch) x = torch.cat(chunks_x, dim=0) y_pred = torch.cat(chunks_y_pred, dim=0) y_true = torch.cat(chunks_y_true, dim=0) return x, y_true, y_pred, loss_epoch def train_val_split(self, x_batch, y_batch, problem_type, val_ratio=0.2): x_train, x_val, y_train, y_val = train_test_split(x_batch.numpy(), y_batch.numpy(), test_size=val_ratio) x_train = torch.tensor(x_train).float() x_val = torch.tensor(x_val).float() if problem_type == 'classification': y_train = torch.tensor(y_train).long() y_val = torch.tensor(y_val).long() elif problem_type == 'regression': y_train = torch.tensor(y_train).float() y_val = torch.tensor(y_val).float() return x_train, x_val, y_train, y_val def get_best_network(self): network = ComplexStochasticNetwork(genome=self.network.genome, is_trainable=True) network.load_state_dict(self.best_network_state) return network