def __init__(self, env_name, info=False, gamma=0.9, entropy_beta=0.01, global_update_step=20): self.env = gym.make(env_name) num_inputs = self.env.observation_space.shape[0] num_actions = self.env.action_space.shape[0] # init some other parameters.... self.gamma = gamma self.global_update_step = global_update_step self.info = info # build up the personal network... self.value_network_local = models.Value(num_inputs) self.policy_network_local = models.Policy(num_inputs, num_actions)
def __init__(self, args, env): # define the arguments and environments... self.args = args self.env = env # define the num of inputs and num of actions num_inputs = self.env.observation_space.shape[0] num_actions = self.env.action_space.shape[0] # define the model save dir... self.saved_path = self.args.save_dir + self.args.env_name + '/' # check the path if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) if not os.path.exists(self.saved_path): os.mkdir(self.saved_path) # define the networks... self.policy_network = models.Policy(num_inputs, num_actions) self.value_network = models.Value(num_inputs) # define the optimizer self.optimizer_value = torch.optim.Adam(self.value_network.parameters(), lr=self.args.value_lr, weight_decay=self.args.l2_reg) # init the filter... self.running_state = ZFilter((num_inputs,), clip=5)
def __init__(self, env, args): # define the parameters... self.env = env # get the environment's input size and output size num_inputs = self.env.observation_space.shape[0] num_actions = self.env.action_space.shape[0] # get the parameters self.args = args self.saved_path = 'saved_models/' + str(self.args.env_name) + '/' # check the path if not os.path.exists(self.saved_path): os.mkdir(self.saved_path) # check if cuda is avaiable... self.use_cuda = torch.cuda.is_available() and self.args.cuda print('The cuda is avaiable: ' + str(torch.cuda.is_available())) print('If use the cuda: ' + str(self.args.cuda)) # define the network... self.policy_network = models.Policy(num_inputs, num_actions) self.value_network = models.Value(num_inputs) if self.use_cuda: self.policy_network.cuda() self.value_network.cuda() # define the optimizer self.optimizer_value = torch.optim.Adam( self.value_network.parameters(), lr=self.args.value_lr, weight_decay=self.args.l2_reg) self.optimizer_policy = torch.optim.Adam( self.policy_network.parameters(), lr=self.args.policy_lr, weight_decay=self.args.l2_reg) # init the Filter... self.running_state = ZFilter((num_inputs, ), clip=5)
def post_evaluate(policies_dict, add_noise_post=False, n_test_episodes=5): '''function tests n best policies for 5 episodes each 1. policies_dict - structure with 2 lists: path to policy parameters, reward obtained using this policy add_noise parameter - if true - adds parameter noise with sigma(std) same as it was used during training ''' # after getting best n policies it is necessary to post evaluate all of them to choose the best one post_evaluation = {} # n_test_episodes = 5 for policy in policies_dict.paths: value_path = policy + "_value" policy_path = policy + "_policy" env = gym.make(args.env) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] # loading pretrained networks if 'Nonoise' in policy: policy_layer = Policy(num_inputs, num_actions) else: policy_layer = PolicyLayerNorm(num_inputs, num_actions) value_layer = models.Value(num_inputs) policy_layer.load_state_dict(torch.load(policy_path)) value_layer.load_state_dict(torch.load(value_path)) if add_noise_post and not 'Nonoise' in policy: right_part = policy.split('seed')[1] current_seed = right_part.split('/')[0] current_setting = policy.split('/')[-1] sigma_path = args.src + "/seed" + current_seed + "/sigma_behaviour/" + current_setting sigma_episode = int(policy.split('episode_')[1][0]) with open(sigma_path, 'rb') as f: sigmas = pickle.load(f) current_sigma = sigmas[sigma_episode] rewards_batch = [] for i_episode in range(n_test_episodes): #seed to make equivalent initial conditions for all policies env.seed(i_episode) torch.manual_seed(i_episode) state = env.reset() reward_sum = 0 for t in range(1000): state = torch.FloatTensor(state) if add_noise_post and not 'Nonoise' in policy: action = select_action(policy_layer, state, current_sigma, noise=True) elif not add_noise_post and not 'Nonoise' in policy: action = select_action(policy_layer, state, noise=False) else: action = select_action(policy_layer, state, noise=False) action = action.data[0].numpy() next_state, reward, done, _ = env.step(action) reward_sum += reward # next_state = running_state(next_state) if done: break state = next_state rewards_batch.append(reward_sum) post_evaluation[policy] = sum(rewards_batch) / (len(rewards_batch)) return post_evaluation
import gym import ppo_agent import models import mujoco_py env = gym.make('Humanoid-v1') num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] print('The number of states is ' + str(num_inputs)) print('The number of actions is ' + str(num_actions)) policy_network = models.Policy(num_inputs, num_actions) value_network = models.Value(num_inputs) ppo_man = ppo_agent.ppo_brain(env, policy_network, value_network, use_cuda=False) ppo_man.test_network('saved_models/Humanoid-v1/policy_net_model_400.pt')
from a3c_agent_continues import A3C_Workers torch.set_default_tensor_type('torch.DoubleTensor') if __name__ == '__main__': env_name = 'Pendulum-v0' save_path = 'saved_models/Pendulum-v0/' # the number of cpu... num_of_workers = multiprocessing.cpu_count() env = gym.make(env_name) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] # build up the center network.... value_network_global = models.Value(num_inputs) policy_network_global = models.Policy(num_inputs, num_actions) value_network_global.share_memory() policy_network_global.share_memory() # build up the workers... workers = [] processor = [] #worker_test = A3C_Workers(env_name) #worker_test.test_the_network(path='saved_models/policy_model_3700.pt') for idx in range(num_of_workers): if idx == 0: workers.append(A3C_Workers(env_name, True)) else:
def create_value(db: Session, value: schemas.ValueCreate): db_value = models.Value(title=value.title, description=value.description) db.add(db_value) db.commit() db.refresh(db_value) return db_value