def __init__(self, args, name): super().__init__() # --------- AGENT --------------- self.name = name self.check_args(args) self.args = args # --------- ENVIROMENT ---------- self.env = gym.make(self.args.env_name) self.current_features_sequence = None # Gets set in self.reset_env(). This holds encoded sequence if enabled # --------- ENV STATE --------------- self.n_states = self.env.observation_space.shape[0] self.states_sequence = deque(maxlen=self.args.n_sequence) self.state_max_val = self.env.observation_space.low.min() self.state_min_val = self.env.observation_space.high.max() self.n_actions = self.env.action_space.n self.epsilon = 1.0 # --------- MODELS -------------- if self.args.encoder_type != 'nothing': self.feature_extractor = FeatureExtractor(self.args, self.n_states) builder = ModelBuilder(self.args, self.n_states, self.n_actions) if self.args.encoder_type == 'conv': builder.encoder_output_size = self.feature_extractor.encoder_output_size self.dqn_model = builder.build_dqn_model() self.target_model = builder.build_dqn_model() if self.args.is_curiosity: self.inverse_model = builder.build_inverse_model() self.forward_model = builder.build_forward_model() # -------- OPTIMIZER AND LOSS ---- if self.args.is_curiosity: params = list(self.inverse_model.parameters()) + list( self.feature_extractor.encoder.parameters()) + list( self.forward_model.parameters()) + list( self.dqn_model.parameters()) else: params = self.dqn_model.parameters() self.optimizer = torch.optim.Adam(params=params, lr=self.args.learning_rate) self.dqn_model_loss_fn = nn.MSELoss() self.inverse_model_loss_fn = nn.MSELoss() # --------- INTERNAL STATE ------- self.current_episode = 0 self.total_steps = 0 # in all episodes combined self.current_step = 0 # only in current episode self.memory = Memory(capacity=self.args.memory_size, is_per=self.args.is_prioritized) # ----- TRAINING BUFFER -------- self.loss_dqn = [] self.ers = [] if self.args.is_curiosity: self.loss_inverse = [] self.cos_distance = [] self.loss_combined = [] # ----- EPISODE BUFFER -------- self.e_loss_dqn = [] self.e_reward = [] if self.args.is_curiosity: self.e_loss_inverse = [] self.e_cos_distance = [] self.e_loss_combined = [] self.update_target()