def __init__(self, g_list, test_g_list, env): self.g_list = g_list if test_g_list is None: self.test_g_list = g_list else: self.test_g_list = test_g_list self.env = env self.net = QNet() self.old_net = QNet() self.optimizer = optim.Adam(self.net.parameters(), lr=cmd_args.learning_rate) if cmd_args.ctx == 'gpu': self.net = self.net.cuda() self.old_net = self.old_net.cuda() self.eps_start = 1.0 self.eps_end = 1.0 self.eps_step = 10000 self.burn_in = 100 # number of iterations to run first set ("intial burning in to memory") of simulations? self.step = 0 self.best_eval = None self.pos = 0 self.sample_idxes = list(range(len(g_list))) random.shuffle(self.sample_idxes) self.take_snapshot()
def agent_init(self, **options): self.use_gpu = options['use_gpu'] self.depth_image_dim = options['depth_image_dim'] self.q_net_input_dim = self.image_feature_dim * self.image_feature_count + self.depth_image_dim if os.path.exists(self.cnn_feature_extractor): print("loading... " + self.cnn_feature_extractor) with open(self.cnn_feature_extractor, 'rb') as f: self.feature_extractor = pickle.load(f) print("done") else: print('there is no chainer alexnet model file ', self.cnn_feature_extractor) print('making chainer model from ', self.model) print('this process take a tens of minutes.') self.feature_extractor = CnnFeatureExtractor( self.use_gpu, self.model, self.model_type, self.image_feature_dim) pickle.dump(self.feature_extractor, open(self.cnn_feature_extractor, 'wb')) print("pickle.dump finished") self.time = 0 self.epsilon = 1.0 # Initial exploratoin rate self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim)
def agent_init(self, **options): self.use_gpu = options['use_gpu'] self.agent_count = options['agent_count'] self.image_count = options['rgb_image_count'] self.depth_image_dim = options['depth_image_dim'] self.ir_idm = options['ir_dim'] self.ground_dim = options['ground_dim'] self.compass_dim = options['compass_dim'] self.target_dim = options['target_dim'] self.model = options['model'] self.cnn_input_dim = self.image_dim * self.image_count self.feature_dim = self.image_feature_dim * self.image_feature_count self.other_input_dim = self.depth_image_dim + self.ir_idm + self.ground_dim + self.compass_dim + self.target_dim self.time = 1 self.epsilon = 1.0 self.avgloss_log_file = self.avgloss_log + "avg_loss.log" if self.model != 'None': self.policy_frozen = False self.epsilon = 0.5 self.q_net = QNet(self.use_gpu, self.actions, self.cnn_input_dim, self.feature_dim, self.agent_count, self.other_input_dim, self.model)
def __init__(self, obs_dims, act_dim, lr=1e-3, gamma=0.99, replay_buffer_size=10000, batch_size=64, epsilon_min=0.01, epsilon_dec=5e-5, target_update_frequency=64): self.buffer = ReplayBuffer(replay_buffer_size, obs_dims) self.batch_size = batch_size self.q_eval = QNet(obs_dims, act_dim) self.q_target = QNet(obs_dims, act_dim) self.obs_dims = obs_dims self.act_dim = act_dim self.learn_ctr = 0 self.target_update_frequency = target_update_frequency self.gamma = gamma self.epsilon = 1 self.epsilon_min = epsilon_min self.epsilon_dec = epsilon_dec self.optimizer = torch.optim.Adam(self.q_eval.parameters(), lr=lr) self.loss_fn = torch.nn.MSELoss()
def agent_init(self, **options): self.use_gpu = options['use_gpu'] #self.depth_image_dim = options['depth_image_dim'] self.q_net_input_dim = self.image_feature_dim * self.image_feature_count #+ self.depth_image_dim if os.path.exists(self.cnn_feature_extractor): print("loading... " + self.cnn_feature_extractor), self.feature_extractor = pickle.load(open(self.cnn_feature_extractor)) print("done") else: self.feature_extractor = CnnFeatureExtractor(self.use_gpu, self.model, self.model_type, self.image_feature_dim) pickle.dump(self.feature_extractor, open(self.cnn_feature_extractor, 'w')) print("pickle.dump finished") self.time = 0 self.epsilon = 1.0 # Initial exploratoin rate self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim)
def agent_init(self, **options): try: self.image_count = options['image_count'] self.depth_image_dim = options['depth_image_dim'] self.use_gpu = options['use_gpu'] self.test = options['test'] self.folder = options["folder"] #save_modelで使う->self. model_num = options['model_num'] self.q_net_input_dim = self.image_feature_dim * self.image_count + self.depth_image_dim * self.image_count if os.path.exists(self.cnn_feature_extractor): print("loading... " + self.cnn_feature_extractor), self.feature_extractor = pickle.load( open(self.cnn_feature_extractor)) print("done") else: self.feature_extractor = CnnFeatureExtractor( self.use_gpu, self.model, self.model_type, self.image_feature_dim) pickle.dump(self.feature_extractor, open(self.cnn_feature_extractor, 'w')) print("pickle.dump finished") self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim) self.time = model_num + 1 #saveとloadが同時に行われることを防ぐため if (self.test): self.epsilon = 0.0 else: non_exploration = max( self.time - self.q_net.initial_exploration, 0) self.epsilon = max(1.0 - non_exploration * self.epsilon_delta, self.min_eps) print "epsilon = ", self.epsilon if (self.test or model_num > 0): self.q_net.load_model(self.folder, model_num) except: import traceback import sys traceback.print_exc() sys.exit()
def agent_init(self, **options): self.use_gpu = options['use_gpu'] self.pad_state_dim = options['pad_states_dim'] self.q_net_input_dim = self.image_feature_dim + self.pad_state_dim if os.path.exists(self.cnn_feature_extractor): print("loading... " + self.cnn_feature_extractor), self.feature_extractor = pickle.load( open(self.cnn_feature_extractor)) else: print("pickle.dump start") self.feature_extractor = CnnFeatureExtractor( self.use_gpu, self.model, self.model_type, self.image_feature_dim) pickle.dump(self.feature_extractor, open(self.cnn_feature_extractor, 'wb')) print("pickle.dump finished") self.time = 0 self.epsilon = 1.0 # Initial exploratoin rate self.q_net = QNet(self.use_gpu, self.num_of_action_type, self.num_of_pad, self.q_net_input_dim)
def agent_init(self): self.q_net = QNet(self.use_gpu, self.q_net_input_dim, self.agent_id)