示例#1
0
 def get_batches(self, batch_size, current_epoch, 
                 random_sample=True, is_training=True):
   dataset_path = os.path.join(self.dataset_path, '%03d' % current_epoch)
   data = self.read_data(dataset_path)
   for i in range(self.iterations_per_epoch):
     batch = recDotDefaultDict()
     batch.is_training = is_training 
     if random_sample:
       replays = random.sample(data, batch_size)
       for x in replays:
         batching_dicts(batch, x)
       yield batch
示例#2
0
def create_test_batch(roles, picks, bans, vocab):
    batch = recDotDefaultDict()
    example = create_example(roles, picks, bans, vocab)
    batch = batching_dicts(
        batch, example)  # list of dictionaries to dictionary of lists.
    batch = padding(batch)
    return batch
示例#3
0
 def tensorize(self, data):
     batch = recDotDefaultDict()
     for d in data:
         batch = batching_dicts(
             batch, d)  # list of dictionaries to dictionary of lists.
     batch = self.padding(batch)
     return batch
示例#4
0
  def add_replay(self, fpath):
    log = self.read_log(fpath)
    if not log:
      return 
    fkey, p1log, p2log = log
    p1log_tensors = recDotDefaultDict()
    for d in p1log:
      batching_dicts(p1log_tensors, d)

    p2log_tensors = recDotDefaultDict()
    for d in p2log:
      batching_dicts(p2log_tensors, d)
    # Propagate rewards from the last state for N-step TD.
    # T = len(p1log)
    # for t in range(T):
    #   p1log[t].reward = (self.td_gamma ** (T - t - 1)) * p1log[-1].reward
    #   p2log[t].reward = (self.td_gamma ** (T - t - 1)) * p2log[-1].reward
    data = [p1log_tensors, p2log_tensors]
    return data