def __init__(self, model_info, alg_config, **kwargs): """ Initialize DQN algorithm. It contains four steps: 1. override the default config, with user's configuration; 2. create the default actor with Algorithm.__init__; 3. create once more actor, named by target_actor; 4. create the replay buffer for training. :param model_info: :param alg_config: """ model_info = model_info["actor"] super(DQNInfoFlowAlg, self).__init__(alg_name="info_flow_dqn", model_info=model_info, alg_config=alg_config) self.target_actor = model_builder(model_info) self.buff = ReplayBuffer(alg_config.get("buffer_size", BUFFER_SIZE)) self.batch_size = alg_config.get("batch_size", BATCH_SIZE) self.target_update_freq = alg_config.get("target_update_freq", TARGET_UPDATE_FREQ) self.gamma = alg_config.get("gamma", GAMMA) self.item_dim = alg_config.get("item_dim") self.user_dim = alg_config.get("user_dim") self.async_flag = False self._times = list()
def __init__(self, alg_name, model_info, alg_config=None, **kwargs): """ use the model info create a algorithm :param alg_name: :param model_info: model_info["actor"] :param alg_config: """ self.actor = model_builder(model_info) self.state_dim = model_info.get("state_dim") self.action_dim = model_info.get("action_dim") self.train_count = 0 self.alg_name = alg_name self.alg_config = alg_config self.async_flag = True # set default weights map, make compatibility to single agent self._weights_map = self.update_weights_map() # trainable state self._train_ready = True self.sync_weights = False # train property self._prepare_times_per_train = alg_config.get( "prepare_times_per_train", alg_config["instance_num"] * alg_config["agent_num"], ) self.dist_model_policy = DefaultAlgDistPolicy( alg_config["instance_num"], prepare_times=self._prepare_times_per_train) self.learning_starts = alg_config.get("learning_starts", 0) self._train_per_checkpoint = alg_config.get("train_per_checkpoint", 1) logging.debug("train/checkpoint: {}".format(self.train_per_checkpoint))
def __init__(self, model_info, alg_config, **kwargs): """Initialize DQN algorithm. it's contains four steps: 1. override the default config, with user's configuration; 2. create the default actor with Algorithm.__init__; 3. create once more actor, named by target_actor; 4. create the replay buffer for training. :param model_info: :param alg_config: """ import_config(globals(), alg_config) model_info = model_info["actor"] super(DQN, self).__init__(alg_name="dqn", model_info=model_info, alg_config=alg_config) self.target_actor = model_builder(model_info) self.buff = ReplayBuffer(BUFFER_SIZE)