def __init__(self, predictor_io_names, player, batch_size=32, memory_size=1e6, init_memory_size=50000, exploration=1, end_exploration=0.1, exploration_epoch_anneal=0.002, reward_clip=None, update_frequency=1, history_len=1): """ :param predictor: a callabale running the up-to-date network. called with a state, return a distribution. :param player: an `RLEnvironment` :param history_len: length of history frames to concat. zero-filled initial frames :param update_frequency: number of new transitions to add to memory after sampling a batch of transitions for training """ init_memory_size = int(init_memory_size) for k, v in locals().items(): if k != 'self': setattr(self, k, v) self.num_actions = player.get_action_space().num_actions() logger.info("Number of Legal actions: {}".format(self.num_actions)) self.mem = deque(maxlen=int(memory_size)) self.rng = get_rng(self) self._init_memory_flag = threading.Event( ) # tell if memory has been initialized self._predictor_io_names = predictor_io_names
def __init__(self, config, rng=None, load=True, shuffle=False): self.config = config self.rng = get_rng(rng) self.inputs, self.outputs, self.codes, self.code_lengths = {}, {}, {}, {} self.input_strings, self.output_strings = {}, {} self.with_input_string = False self.iterator = {} self._inputs, self._outputs, self._codes, self._code_lengths = {}, {}, {}, {} self._input_strings, self._output_strings = {}, {} self.data_names = ['train', 'test', 'val'] self.data_paths = { key: os.path.join(config.data_dir, '{}.{}'.format(key, config.data_ext)) \ for key in self.data_names } if load: self.load_data() for name in self.data_names: self.build_tf_data(name) if shuffle: self.shuffle() self.generator = Generator(config.world_width, colors=config.colors, shapes=config.shapes)
def __init__(self, rng=None, min_int=0, max_int=19, debug=False, **kwargs): super(KarelParser, self).__init__(**kwargs) self.debug = debug self.min_int = min_int self.max_int = max_int self.rng = get_rng(rng)
def reset_state(self): self.rng = get_rng()
def __init__(self, datalist, is_train=True, shuffle=True): self.rng = get_rng() self._datalist = datalist self._shuffle = shuffle self._is_train = is_train