def update_model(self): print("Updated model") if self.model is None: self.model = create_model(self.model_shared_dict, device=self.device) else: self.model.load_state_dict(self.model_shared_dict) self.model.eval()
def __init__(self, sample_queue, model_shared_dict, state_dict_optimizer, save_dir, device="cuda:0", episode_length=1000, num_eval_episodes=10, save_interval=30): super().__init__() self.save_interval = save_interval self.sample_queue = sample_queue self.device = device self.model_shared_dict = model_shared_dict self.save_dir = save_dir self.model = create_model(state_dict=model_shared_dict, device=device) self.episode_length = episode_length self.num_eval_episodes = num_eval_episodes self.state_dict_optimizer = state_dict_optimizer self.gamma = 0.99 self.steps = 10_000 self.shutdown = False
device1 = device("cuda:1") device2 = device("cuda:2") # Setting up communication channels buffer_queue = Queue() sample_queue = Queue() model_shared_dict = Manager().dict() # Load checkpoint, if specified checkout, state_dict_model, state_dict_optimizer = None, None, None if args.load: checkpoint = load(args.load) state_dict_model = checkpoint.get("model") state_dict_optimizer = checkpoint.get("optimizer") model = create_model(state_dict_model) model.cpu() model_shared_dict.update(model.state_dict()) # Creating Main processes and hand them their communication channels main_train_process = ModelTrainServer( sample_queue, model_shared_dict, state_dict_optimizer=state_dict_optimizer, save_dir=args.save_dir, device=device0, episode_length=args.episode_length) main_buffer_process = BufferServer(buffer_queue, sample_queue, batch_size=args.t_batch_size) main_inference_process = ModelInferenceServer(
##################################################################### # Define your custom controller # # which can take an observation, and the number of agents and # compute the necessary action for this step for all (or even some) # of the agents ##################################################################### # Calculate the state size given the depth of the tree observation and the number of features if DQN_MODEL: n_features_per_node = obs_builder.observation_dim n_nodes = sum([np.power(4, i) for i in range(obs_tree_depth + 1)]) state_size = n_features_per_node * n_nodes model = LinearModel(device, state_size, [], 5) checkpoint = load(CHECKPOINT_PATH, map_location=device) state_dict_model = checkpoint.get("model") model = create_model(state_dict=state_dict_model, device=device) controller = create_controller(model=model) print("Created model") else: controller = WaitIfOccupiedAnywhereController() ##################################################################### # Main evaluation loop # # This iterates over an arbitrary number of env evaluations ##################################################################### evaluation_number = 0 print("Starting evaluation") while True: evaluation_number += 1