if __name__ == "__main__": input_size = 28 params_dict = { 'context_input_size': 32, 'input_size': input_size, 'output_size': 10, 'center_size': 1, 'center_size_per_module': 32, 'num_modules': 4 } # Initialize the application state singleton. from utils.app_state import AppState app_state = AppState() app_state.visualize = True from utils.param_interface import ParamInterface params = ParamInterface() params.add_custom_params(params_dict) model = ThalNetModel(params) seq_length = 10 batch_size = 2 # Check for different seq_lengts and batch_sizes. for i in range(1): # Create random Tensors to hold inputs and outputs x = torch.randn(batch_size, 1, input_size, input_size) logits = torch.randn(batch_size, 1, params_dict['output_size']) y = x
# Main training and verification loop. for data_tuple, aux_tuple in problem.return_generator(): # apply curriculum learning - change problem max seq_length curric_done = problem.curriculum_learning_update_params(episode) # reset gradients optimizer.zero_grad() # Check visualization flag - turn on when we wanted to visualize (at # least) validation. if FLAGS.visualize is not None and FLAGS.visualize <= 1: AppState().visualize = True else: app_state.visualize = False # Turn on training mode. model.train() # 1. Perform forward step, calculate logits and loss. logits, loss = forward_step(model, problem, episode, stat_col, data_tuple, aux_tuple) if not use_validation_problem: # Store the calculated loss on a list. last_losses.append(loss) # Truncate list length. if len(last_losses) > loss_length: last_losses.popleft() # 2. Backward gradient flow.