Пример #1
0
  def __init__(self, rng, id_num, arr, num_moves, args):
    self.args = args
    self.rng = rng
    self.num_moves = num_moves
    self.id_num = id_num

    self.env = ALE_env(args, rng=rng)
    self.agent = AOCAgent_THEANO(self.env.action_space, id_num, arr, num_moves, args)

    self.train()
Пример #2
0
class Training():
  def __init__(self, rng, id_num, arr, num_moves, args):
    self.args = args
    self.rng = rng
    self.num_moves = num_moves
    self.id_num = id_num

    self.env = ALE_env(args, rng=rng)
    self.agent = AOCAgent_THEANO(self.env.action_space, id_num, arr, num_moves, args)

    self.train()

  def train(self):
    total_reward = 0
    x = self.env.reset()
    self.agent.reset(x)
    timer = time.time()
    recent_fps = []
    frame_counter = 0
    total_games = 0
    done = False

    while self.num_moves.value < self.args.max_num_frames:
      if done:
        #ugly code, beautiful print
        total_games += 1
        secs = round(time.time()-timer, 1)
        frames = self.env.get_frame_count()
        fps = int(frames/secs)
        recent_fps = recent_fps[-9:]+[fps]
        eta = ((self.args.max_num_frames-self.num_moves.value)*self.args.frame_skip/(self.args.num_threads*np.mean(recent_fps)))
        print "id: %d\treward: %d\ttime: %.1f\tframes: %d\t %dfps  \tmoves: %d \t ETA: %dh %dm %ds  \t%.2f%%" % \
        (self.id_num, total_reward, secs, frames, fps, self.num_moves.value, int(eta/3600), int(eta/60)%60, int(eta%60), 
          float(self.num_moves.value)/self.args.max_num_frames*100)
        timer = time.time()
        frame_counter = 0

        if total_games % 1 == 0 and self.id_num == 1 and not self.args.testing:
          self.agent.save_values(folder_name)
          print "saved model"
        total_reward = 0
        x = self.env.reset()
        self.agent.reset(x)
        done = False

      action = self.agent.get_action(x)
      new_x, reward, done, death = self.env.act(action)
      self.agent.store(x, new_x, action, reward, done, death)
      if self.args.testing:
        self.env.render()
      total_reward += reward
      x = np.copy(new_x)
Пример #3
0
        if last.split(",")[0].isdigit():
          init_num_moves = int(last.split(",")[0])
    init_weights = pickle.load(open(folder_name+"/model.pkl", "rb"))
    is_testing = copy.deepcopy(params.testing)
    params = pickle.load(open(params.load_folder+"/params.pkl", "rb"))
    params.testing = is_testing
    if is_testing:
      params.num_threads = 1
  else:
    folder_name = foldercreation(folder_name, params)
    pickle.dump(params, open(folder_name+"/params.pkl", "wb"))

  setattr(params, "folder_name", folder_name)

  setattr(params, "init_num_moves", init_num_moves)
  print "init_num_moves:", init_num_moves

  f = lambda rng, i, shared_arr, num_moves, args: Training(rng, i, shared_arr, num_moves, args)

  env = ALE_env(params)
  if init_num_moves == 0:
    init_weights = (AOCAgent_THEANO(env.action_space, 0, args=params)).get_param_vals()
    
  num_moves = Value("i", init_num_moves, lock=False)
  arr = [Array('f', m.flatten(), lock=False) for m in init_weights]
  seed = np.random.randint(10000)
  for i in range(params.num_threads):
    Process(target=f, args=(np.random.RandomState(seed+i), i+1, arr, num_moves, params)).start()


Пример #4
0
class Training():
    def __init__(self, rng, id_num, arr, num_moves, args):
        self.args = args
        self.rng = rng
        self.num_moves = num_moves
        self.id_num = id_num
        self.env = ALE_env(args, rng=rng)
        self.agent = AOCAgent_THEANO(self.env.action_space, id_num, arr,
                                     num_moves, args)

        self.train()

    def train(self):
        total_reward = 0
        x = self.env.reset()  #returns the current x
        self.agent.reset(x)
        timer = time.time()
        recent_fps = []
        total_games = 0
        frame_counter = 0
        done = False
        totalreward_k_games = 0
        reward_k_games = []

        while self.num_moves.value < self.args.max_num_frames:
            if done:
                total_games += 1
                secs = round(time.time() - timer, 1)
                frames = self.env.get_frame_count()
                fps = int(frames / secs)
                recent_fps = recent_fps[-9:] + [fps]
                eta = ((self.args.max_num_frames - self.num_moves.value) *
                       self.args.frame_skip /
                       (self.args.num_threads * np.mean(recent_fps)))
                print "id: %d\treward: %d\ttime: %.1f\tframes: %d\t %dfps  \tmoves: %d \t ETA: %dh %dm %ds  \t%.2f%%" % \
                (self.id_num, total_reward, secs, frames, fps, self.num_moves.value, int(eta/3600), int(eta/60)%60, int(eta%60),
                  float(self.num_moves.value)/self.args.max_num_frames*100)
                if self.args.testing:
                    if params.load_folder != "":
                        new_folder_name = params.load_folder
                    else:
                        print("No load folders for testing !!!")
                        exit(0)
                    if total_games <= self.args.kgames:
                        reward_k_games = np.append(reward_k_games,
                                                   total_reward)
                        totalreward_k_games += total_reward
                    if total_games > self.args.kgames:
                        rewards_saving = np.asarray(reward_k_games)
                        np.save(new_folder_name + "/test_result.npy",
                                rewards_saving)  # saved rewards for k games
                        avgreward_k_games = totalreward_k_games / self.args.kgames
                        mean_reward = np.mean(reward_k_games)
                        std_reward = np.std(reward_k_games)
                        final_reward_stats = [mean_reward, std_reward]
                        np.savetxt(new_folder_name + "/test_result_stats.txt",
                                   np.asarray(final_reward_stats))
                        print "----------------------------------------------"
                        print "average reward for k games: ", avgreward_k_games
                        print "Numpy average reward for k games: ", (
                            np.mean(reward_k_games))
                        print "std deviation for k games: ", (
                            np.std(reward_k_games))
                        exit(0)
                timer = time.time()
                frame_counter = 0

                if total_games % 1 == 0 and self.id_num == 1 and not self.args.testing:
                    self.agent.save_values(folder_name)
                    print "saved model"
                total_reward = 0
                x = self.env.reset()
                self.agent.reset(x)
                done = False

            action = self.agent.get_action(x)
            new_x, reward, done, death = self.env.act(action)
            self.agent.store(x, new_x, action, reward, done, death)
            if self.args.testing:
                self.env.render()
            total_reward += reward
            x = np.copy(new_x)