def __init__(self, config, action_mask): super(CL_DPG, self).__init__(config) # Set Hyper-parameters self.initial_phase = not config.true_embeddings and not config.load_embed and not config.restore # Initial training phase required if learning embeddings self.batch_norm = False # Function to get state features and action representation self.state_features = Basis.get_Basis(config=config) self.action_rep = CL_ActionRepresentation.VAE_Action_representation( action_dim=self.action_dim, state_dim=self.state_features.feature_dim, config=config) # Create instances for Actor and Q_fn self.actor = Actor(action_dim=self.action_rep.reduced_action_dim, state_dim=self.state_features.feature_dim, config=config) self.Q = Q_fn(action_dim=self.action_rep.reduced_action_dim, state_dim=self.state_features.feature_dim, config=config) # Create target networks # Deepcopy not working. self.target_state_features = Basis.get_Basis(config=config) self.target_actor = Actor( action_dim=self.action_rep.reduced_action_dim, state_dim=self.state_features.feature_dim, config=config) self.target_Q = Q_fn(action_dim=self.action_rep.reduced_action_dim, state_dim=self.state_features.feature_dim, config=config) # self.target_action_rep = ActionRepresentation.Action_representation_deep(action_dim=self.action_dim, config=config) # Copy the initialized values to target self.target_state_features.load_state_dict( self.state_features.state_dict()) self.target_actor.load_state_dict(self.actor.state_dict()) self.target_Q.load_state_dict(self.Q.state_dict()) # self.target_action_rep.load_state_dict(self.action_rep.state_dict()) self.memory = MemoryBuffer( max_len=self.config.buffer_size, state_dim=self.state_dim, action_dim=1, atype=long, config=config, dist_dim=self.action_rep.reduced_action_dim) # off-policy self.noise = OrnsteinUhlenbeckActionNoise( self.config.reduced_action_dim) self.modules = [('actor', self.actor), ('Q', self.Q), ('state_features', self.state_features), ('action_rep', self.action_rep), ('target_actor', self.target_actor), ('target_state_features', self.target_state_features), ('target_Q', self.target_Q)] #, # ('target_action_rep', self.target_action_rep)] self.init() self.update_mask(action_mask=action_mask)
def __init__(self, config, action_mask): super(CL_ActorCritic, self).__init__(config) # Initial training phase required if learning embeddings from scratch self.initial_phase = not config.true_embeddings and not config.load_embed # Function to get state features and action representation self.state_features = Basis.get_Basis(config=config) self.action_rep = CL_ActionRepresentation.VAE_Action_representation(state_dim=self.state_features.feature_dim, action_dim=self.action_dim, config=config) # Create instances for Actor and Q_fn self.critic = Critic.Critic_with_traces(state_dim=self.state_features.feature_dim, config=config) self.actor = Policy.embed_Gaussian(action_dim=self.action_rep.reduced_action_dim, state_dim=self.state_features.feature_dim, config=config) # Initialize storage containers self.memory = MemoryBuffer(max_len=self.config.buffer_size, state_dim=self.state_dim, action_dim=1, atype=long, config=config, dist_dim=self.action_rep.reduced_action_dim) # off-policy self.trajectory = Trajectory(max_len=self.config.batch_size, state_dim=self.state_dim, action_dim=1, atype=long, config=config, dist_dim=self.action_rep.reduced_action_dim) # on-policy self.modules = [('actor', self.actor), ('critic', self.critic), ('state_features', self.state_features), ('action_rep', self.action_rep)] self.init() self.update_mask(action_mask=action_mask)
def __init__(self, config): super(embed_Reinforce, self).__init__(config) self.ep_rewards = [] self.ep_states = [] self.ep_actions = [] self.ep_exec_action_embs = [] self.ep_chosen_action_embs = [] # Set Hyper-parameters self.memory = MemoryBuffer(size=config.buffer_size) self.counter = 0 self.initial_phase = not config.true_embeddings # Initial training phase required if learning embeddings # Function to get state features and action representation if config.fourier_order > 0: self.state_features = Basis.Fourier_Basis(config=config) else: self.state_features = Basis.NN_Basis(config=config) # Function to get state features and action representation self.action_rep = Action_representation( state_dim=self.state_features.feature_dim, action_dim=self.action_dim, config=config) self.baseline = Critic.Critic( state_dim=self.state_features.feature_dim, config=config) # Create instances for Actor and Q_fn self.atype = config.dtype self.actor = Policy.embed_Gaussian( action_dim=self.action_rep.reduced_action_dim, state_dim=self.state_features.feature_dim, config=config) self.action_size = self.action_dim self.modules = [('actor', self.actor), ('baseline', self.baseline), ('state_features', self.state_features), ('action_rep', self.action_rep)] self.init()
def __init__(self, config, action_mask): super(CL_Vanilla_ActorCritic, self).__init__(config) # Get state features and instances for Actor and Value function self.state_features = Basis.get_Basis(config=config) self.actor, self.atype, self.action_size = Policy.get_Policy(state_dim=self.state_features.feature_dim, config=config) self.critic = Critic.Critic(state_dim=self.state_features.feature_dim, config=config) self.trajectory = utils.Trajectory(max_len=self.config.batch_size, state_dim=self.state_dim, action_dim=self.action_size, atype=self.atype, config=config, dist_dim=1) self.modules = [('actor', self.actor), ('baseline', self.critic), ('state_features', self.state_features)] self.init() self.update_mask(action_mask=action_mask)
def __init__(self, config): super(OFPG, self).__init__(config) # Get state features and instances for Actor and Value function self.state_features = Basis.get_Basis(config=config) self.actor, self.atype, self.action_size = NS_utils.get_Policy( state_dim=self.state_features.feature_dim, config=config) self.memory = utils.TrajectoryBuffer(buffer_size=config.buffer_size, state_dim=self.state_dim, action_dim=self.action_size, atype=self.atype, config=config, dist_dim=1) self.modules = [('actor', self.actor), ('state_features', self.state_features)] self.counter = 0 self.init()
def __init__(self, config): super(ProOLS, self).__init__(config) # Get state features and instances for Actor and Value function self.state_features = Basis.get_Basis(config=config) self.actor, self.atype, self.action_size = NS_utils.get_Policy( state_dim=self.state_features.feature_dim, config=config) self.memory = utils.TrajectoryBuffer(buffer_size=config.buffer_size, state_dim=self.state_dim, action_dim=self.action_size, atype=self.atype, config=config, dist_dim=1) self.extrapolator = OLS(max_len=config.buffer_size, delta=config.delta, basis_type=config.extrapolator_basis, k=config.fourier_k) self.modules = [('actor', self.actor), ('state_features', self.state_features)] self.counter = 0 self.init()