Пример #1
0
    def test_create_file(self):
        # Remove if the file already exists
        helper.remove_file(self.__class__.created_file)

        helper.create_file(BASE_DIR, self.__class__.filename)
        self.assertTrue(os.path.exists(self.__class__.cwd))
Пример #2
0
 def test_silentmove(self):
     helper.create_file(BASE_DIR, self.__class__.filename)
     helper.append_to_file("hello", self.__class__.created_file)
     helper.silentremove(BASE_DIR, self.__class__.filename)
     self.assertTrue(os.stat(self.__class__.created_file).st_size == 0)
Пример #3
0
def k_learning(env,
               num_episodes,
               h,
               goal,
               epsilon=0.1,
               record_prefix=None,
               is_link=False):
    # Get cell range for the game
    height = env.unwrapped.game.height
    width = env.unwrapped.game.width
    cell_range = "\ncell((0..{}, 0..{})).\n".format(width - 1, height - 1)

    # Log everything and keep the record here
    log_dir = None
    if record_prefix:
        log_dir = os.path.join(cf.BASE_DIR, "log")
        log_dir = helper.gen_log_dir(log_dir, record_prefix)

    # the first abduction needs lots of basic information
    first_abduction = False

    keep_link = None

    # Clean up all the files first
    helper.silentremove(cf.BASE_DIR, cf.GROUNDING)
    helper.silentremove(cf.BASE_DIR, cf.LASFILE)
    helper.silentremove(cf.BASE_DIR, cf.CLINGOFILE)
    helper.silentremove(cf.BASE_DIR, cf.LAS_CACHE, cf.LAS_CACHE_PATH)
    helper.create_file(cf.BASE_DIR, cf.LAS_CACHE, cf.LAS_CACHE_PATH)
    cf.ALREADY_LINK = False
    # Copy pos examples that used in TL before
    tl_file = os.path.join(cf.BASE_DIR, "tl_pos.las")
    helper.copy_file(tl_file, cf.LASFILE)
    # Add mode bias and adjacent definition for ILASP
    induction.copy_las_base(height, width, cf.LASFILE, is_link)

    # record the current hypothesis
    hypothesis = h
    abduction.make_lp_base(cell_range)

    wall_list = induction.get_all_walls(env)

    stats = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes),
                                  episode_rewards=np.zeros(num_episodes),
                                  episode_runtime=np.zeros(num_episodes))

    stats_ilasp = plotting.TimeStats(ILASP_runtime=np.zeros((num_episodes,
                                                             cf.TIME_RANGE)))

    stats_test = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes),
                                       episode_rewards=np.zeros(num_episodes),
                                       episode_runtime=np.zeros(num_episodes))

    for i_episode in range(num_episodes):
        print("==============NEW EPISODE======================")
        print("i_episode ", i_episode)
        start_total_runtime = time.time()

        previous_state = env.reset()
        agent_position = env.unwrapped.observer.get_observation()["position"]
        env.render()
        previous_state_at = py_asp.state_at(previous_state[0],
                                            previous_state[1], 0)

        t = 0
        # Once the agent reaches the goal, the algorithm kicks in
        # Decaying epsilon greedy params
        # new_epsilon = epsilon*(1/(i_episode+1)**cf.DECAY_PARAM)
        new_epsilon = epsilon
        print("new_epsilon ", new_epsilon)

        while t < cf.TIME_RANGE:
            if first_abduction == False:
                # Convert syntax of H for ASP solver
                hypothesis_asp = py_asp.convert_las_asp(hypothesis)
                abduction.add_hypothesis(hypothesis_asp)
                abduction.add_start_state(agent_position)
                abduction.add_goal_state(goal)
                first_abduction = True

            # Update the starting position for Clingo
            agent_position = env.unwrapped.observer.get_observation(
            )["position"]
            abduction.update_agent_position(agent_position, t)
            abduction.update_time_range(agent_position, t)

            # Run clingo to get a plan
            answer_sets = abduction.run_clingo(cf.CLINGOFILE)
            states_plan, actions_array = abduction.sort_planning(answer_sets)

            # Record clingo
            if record_prefix:
                inputfile = os.path.join(cf.BASE_DIR, cf.CLINGOFILE)
                helper.log_asp(inputfile, answer_sets, log_dir, i_episode, t)

            # Execute the planning
            for action_index, action in enumerate(actions_array):
                print("---------Planning phase---------------------")

                # Flip a coin. If threshold < epsilon, explore randomly
                threshold = random.uniform(0, 1)
                if threshold < new_epsilon:
                    action_int = randint(0, 3)
                    if cf.IS_PRINT:
                        print("Taking a pure random action...",
                              helper.convert_action(action_int))
                else:
                    # Following the plan
                    action_int = helper.get_action(action[1])
                    if cf.IS_PRINT:
                        print("Following the plan...",
                              helper.convert_action(action_int))
                action_string = helper.convert_action(action_int)
                next_state, reward, done, _ = env.step(action_int)
                next_state_at = py_asp.state_at(next_state[0], next_state[1],
                                                t + 1)

                if done:
                    reward = reward + 10
                else:
                    reward = reward - 1

                # Meanwhile, accumulate all background knowlege
                abduction.add_new_walls(previous_state, wall_list,
                                        cf.CLINGOFILE)

                # Make ASP syntax of state transition

                pos1, pos2, link = induction.generate_pos(
                    hypothesis, previous_state, next_state, action_string,
                    wall_list, cell_range)

                if link is not None:
                    keep_link = link
                # Update H if necessary
                if (not induction.check_ILASP_cover(
                        hypothesis, pos1, height, width,
                        keep_link)) or (not induction.check_ILASP_cover(
                            hypothesis, pos2, height, width, keep_link)):
                    start_time = time.time()
                    hypothesis = induction.run_ILASP(cf.LASFILE, cf.CACHE_DIR)
                    ilasp_runtime = (time.time() - start_time)
                    stats_ilasp.ILASP_runtime[i_episode, t] += ilasp_runtime
                    # Convert syntax of H for ASP solver
                    hypothesis_asp = py_asp.convert_las_asp(hypothesis)
                    abduction.update_h(hypothesis_asp)
                    if record_prefix:
                        inputfile = os.path.join(cf.BASE_DIR, cf.LASFILE)
                        helper.log_las(inputfile, hypothesis, log_dir,
                                       i_episode, t)

                previous_state = next_state
                previous_state_at = next_state_at

                # Update stats
                stats.episode_rewards[i_episode] += reward
                stats.episode_lengths[i_episode] = action_index

                env.render()
                # time.sleep(0.1)
                t = t + 1

                if done or (threshold < new_epsilon):
                    break

            if not actions_array:
                t = t + 1

            if done:
                break

        stats.episode_runtime[i_episode] += (time.time() - start_total_runtime)
        run_experiment(env, i_episode, stats_test, width, cf.TIME_RANGE)

    return stats, stats_test, stats_ilasp