def get_batches(self, batch_size, current_epoch, random_sample=True, is_training=True): dataset_path = os.path.join(self.dataset_path, '%03d' % current_epoch) data = self.read_data(dataset_path) for i in range(self.iterations_per_epoch): batch = recDotDefaultDict() batch.is_training = is_training if random_sample: replays = random.sample(data, batch_size) for x in replays: batching_dicts(batch, x) yield batch
def create_test_batch(roles, picks, bans, vocab): batch = recDotDefaultDict() example = create_example(roles, picks, bans, vocab) batch = batching_dicts( batch, example) # list of dictionaries to dictionary of lists. batch = padding(batch) return batch
def tensorize(self, data): batch = recDotDefaultDict() for d in data: batch = batching_dicts( batch, d) # list of dictionaries to dictionary of lists. batch = self.padding(batch) return batch
def add_replay(self, fpath): log = self.read_log(fpath) if not log: return fkey, p1log, p2log = log p1log_tensors = recDotDefaultDict() for d in p1log: batching_dicts(p1log_tensors, d) p2log_tensors = recDotDefaultDict() for d in p2log: batching_dicts(p2log_tensors, d) # Propagate rewards from the last state for N-step TD. # T = len(p1log) # for t in range(T): # p1log[t].reward = (self.td_gamma ** (T - t - 1)) * p1log[-1].reward # p2log[t].reward = (self.td_gamma ** (T - t - 1)) * p2log[-1].reward data = [p1log_tensors, p2log_tensors] return data