Exemplo n.º 1
0
    def __init__(self, g_list, test_g_list, env):
        self.g_list = g_list
        if test_g_list is None:
            self.test_g_list = g_list
        else:
            self.test_g_list = test_g_list

        self.env = env
        self.net = QNet()
        self.old_net = QNet()
        self.optimizer = optim.Adam(self.net.parameters(),
                                    lr=cmd_args.learning_rate)

        if cmd_args.ctx == 'gpu':
            self.net = self.net.cuda()
            self.old_net = self.old_net.cuda()

        self.eps_start = 1.0
        self.eps_end = 1.0
        self.eps_step = 10000
        self.burn_in = 100  # number of iterations to run first set ("intial burning in to memory") of simulations?
        self.step = 0

        self.best_eval = None
        self.pos = 0
        self.sample_idxes = list(range(len(g_list)))
        random.shuffle(self.sample_idxes)
        self.take_snapshot()
Exemplo n.º 2
0
    def agent_init(self, **options):
        self.use_gpu = options['use_gpu']
        self.depth_image_dim = options['depth_image_dim']
        self.q_net_input_dim = self.image_feature_dim * self.image_feature_count + self.depth_image_dim

        if os.path.exists(self.cnn_feature_extractor):
            print("loading... " + self.cnn_feature_extractor)
            with open(self.cnn_feature_extractor, 'rb') as f:
                self.feature_extractor = pickle.load(f)
            print("done")
        else:
            print('there is no chainer alexnet model file ',
                  self.cnn_feature_extractor)
            print('making chainer model from ', self.model)
            print('this process take a tens of minutes.')
            self.feature_extractor = CnnFeatureExtractor(
                self.use_gpu, self.model, self.model_type,
                self.image_feature_dim)
            pickle.dump(self.feature_extractor,
                        open(self.cnn_feature_extractor, 'wb'))
            print("pickle.dump finished")

        self.time = 0
        self.epsilon = 1.0  # Initial exploratoin rate
        self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim)
Exemplo n.º 3
0
    def agent_init(self, **options):
        self.use_gpu = options['use_gpu']
        self.agent_count = options['agent_count']
        self.image_count = options['rgb_image_count']
        self.depth_image_dim = options['depth_image_dim']
        self.ir_idm = options['ir_dim']
        self.ground_dim = options['ground_dim']
        self.compass_dim = options['compass_dim']
        self.target_dim = options['target_dim']
        self.model = options['model']

        self.cnn_input_dim = self.image_dim * self.image_count
        self.feature_dim = self.image_feature_dim * self.image_feature_count
        self.other_input_dim = self.depth_image_dim + self.ir_idm + self.ground_dim + self.compass_dim + self.target_dim

        self.time = 1
        self.epsilon = 1.0
        self.avgloss_log_file = self.avgloss_log + "avg_loss.log"

        if self.model != 'None':
            self.policy_frozen = False
            self.epsilon = 0.5

        self.q_net = QNet(self.use_gpu, self.actions, self.cnn_input_dim,
                          self.feature_dim, self.agent_count,
                          self.other_input_dim, self.model)
Exemplo n.º 4
0
 def __init__(self,
              obs_dims,
              act_dim,
              lr=1e-3,
              gamma=0.99,
              replay_buffer_size=10000,
              batch_size=64,
              epsilon_min=0.01,
              epsilon_dec=5e-5,
              target_update_frequency=64):
     self.buffer = ReplayBuffer(replay_buffer_size, obs_dims)
     self.batch_size = batch_size
     self.q_eval = QNet(obs_dims, act_dim)
     self.q_target = QNet(obs_dims, act_dim)
     self.obs_dims = obs_dims
     self.act_dim = act_dim
     self.learn_ctr = 0
     self.target_update_frequency = target_update_frequency
     self.gamma = gamma
     self.epsilon = 1
     self.epsilon_min = epsilon_min
     self.epsilon_dec = epsilon_dec
     self.optimizer = torch.optim.Adam(self.q_eval.parameters(), lr=lr)
     self.loss_fn = torch.nn.MSELoss()
    def agent_init(self, **options):
        self.use_gpu = options['use_gpu']
        #self.depth_image_dim = options['depth_image_dim']
        self.q_net_input_dim = self.image_feature_dim * self.image_feature_count #+ self.depth_image_dim

        if os.path.exists(self.cnn_feature_extractor):
            print("loading... " + self.cnn_feature_extractor),
            self.feature_extractor = pickle.load(open(self.cnn_feature_extractor))
            print("done")
        else:
            self.feature_extractor = CnnFeatureExtractor(self.use_gpu, self.model, self.model_type, self.image_feature_dim)
            pickle.dump(self.feature_extractor, open(self.cnn_feature_extractor, 'w'))
            print("pickle.dump finished")

        self.time = 0
        self.epsilon = 1.0  # Initial exploratoin rate
        self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim)
    def agent_init(self, **options):
        try:
            self.image_count = options['image_count']
            self.depth_image_dim = options['depth_image_dim']
            self.use_gpu = options['use_gpu']
            self.test = options['test']
            self.folder = options["folder"]  #save_modelで使う->self.
            model_num = options['model_num']

            self.q_net_input_dim = self.image_feature_dim * self.image_count + self.depth_image_dim * self.image_count

            if os.path.exists(self.cnn_feature_extractor):
                print("loading... " + self.cnn_feature_extractor),
                self.feature_extractor = pickle.load(
                    open(self.cnn_feature_extractor))
                print("done")

            else:
                self.feature_extractor = CnnFeatureExtractor(
                    self.use_gpu, self.model, self.model_type,
                    self.image_feature_dim)
                pickle.dump(self.feature_extractor,
                            open(self.cnn_feature_extractor, 'w'))
                print("pickle.dump finished")

            self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim)

            self.time = model_num + 1  #saveとloadが同時に行われることを防ぐため
            if (self.test):
                self.epsilon = 0.0
            else:
                non_exploration = max(
                    self.time - self.q_net.initial_exploration, 0)
                self.epsilon = max(1.0 - non_exploration * self.epsilon_delta,
                                   self.min_eps)
            print "epsilon = ", self.epsilon

            if (self.test or model_num > 0):
                self.q_net.load_model(self.folder, model_num)
        except:
            import traceback
            import sys
            traceback.print_exc()
            sys.exit()
Exemplo n.º 7
0
    def agent_init(self, **options):
        self.use_gpu = options['use_gpu']
        self.pad_state_dim = options['pad_states_dim']
        self.q_net_input_dim = self.image_feature_dim + self.pad_state_dim

        if os.path.exists(self.cnn_feature_extractor):
            print("loading... " + self.cnn_feature_extractor),
            self.feature_extractor = pickle.load(
                open(self.cnn_feature_extractor))
        else:
            print("pickle.dump start")
            self.feature_extractor = CnnFeatureExtractor(
                self.use_gpu, self.model, self.model_type,
                self.image_feature_dim)
            pickle.dump(self.feature_extractor,
                        open(self.cnn_feature_extractor, 'wb'))
            print("pickle.dump finished")

        self.time = 0
        self.epsilon = 1.0  # Initial exploratoin rate
        self.q_net = QNet(self.use_gpu, self.num_of_action_type,
                          self.num_of_pad, self.q_net_input_dim)
Exemplo n.º 8
0
 def agent_init(self):
     self.q_net = QNet(self.use_gpu, self.q_net_input_dim, self.agent_id)