class BasicHouseTestCase(unittest.TestCase):
    """Testing house usage and methods"""
    def setUp(self):
        self.house = House(5)
        self.house.day_start = 7 * 60
        self.house.day_end = 24 * 60 - 5 * 60

    def test_get_inside_params(self):
        """
        Tests if returned dictionary is OrderedDict
        and if any inside dicitonaries are OrderedDict
        """
        inside_params = self.house.get_inside_params()

        is_ordered_dict = type(inside_params) is OrderedDict

        self.assertTrue(is_ordered_dict, "Returned dictionary has to be of "
                        "OrderedDict type")

        for param in [d for d in inside_params if isinstance(d, dict)]:
            is_ordered_dict = type(param) is OrderedDict
            self.assertTrue(
                is_ordered_dict, "Inside dictionary has to be of "
                "OrderedDict type")
示例#2
0
class HouseEnergyEnvironment:
    """Endpoints / facade for RL environment.

    This is where we gather together our World, OutsideSensors, House,
    etc., connect each other in a proper way and basically set up a working RL
    environment.

    """
    def __init__(self, world=None, collect_stats=False):
        """
        Declares all class' fields.

        Initialization is moved to reset() method
        to be able to quickly re-initialize the whole environment.

        Stats-related fields describe how many times given parameter
        was close to the desired value within a particular interval
        in current episode (see _update_stats method)

        Args:
            world(World): (optional) a World object to be used in environment
            collect_stats(boolean): (optional)

        """

        add_path = ''
        if 'tests' in os.getcwd():
            add_path = '../'
        with open(add_path + '../configuration.json') as config_file:
            self.config = json.load(config_file)['env']

        self.world = None
        self.outside_sensors = None
        self.house = None

        self.last_reward = 0

        self.collect_stats = collect_stats
        self.timesteps = 0
        self.temp_diff_ok_count = 0
        self.temp_diff_perfect_count = 0
        self.light_diff_ok_count = 0
        self.light_diff_perfect_count = 0

        self.reset(world)

    def step(self, action_name):
        """
        Update the environment by one timestep and update the statistics.
        This method is the main communication point between
        agent and the environment.

        Args:
            action_name(string): a name of action. For possible action names
                                 check get_actions() method
        Returns:
            observation(dict): serialized information about the environment
            reward(float): a reward for RL agent's last action
            done(boolean): information whether the new state, achieved after
                           this update, is terminal (episode end)
        """

        getattr(self.house, action_name)()
        done = self.world.step()
        current_state = self.get_current_state()

        if self.collect_stats:
            self._update_stats(current_state)

        observation = self.serialize_state(current_state)
        self.last_reward = self.house.reward()

        return observation, self.last_reward, done

    def reset(self, world=None):
        """(Re)initializes the environment and registers the listeners.

        Should be used to start a new episode. Returns the first,
        serialized initial state.

        Returns:
            Serialized initial state of the environment
        """

        self.world = world or World()

        self.timesteps = 0
        self.temp_diff_ok_count = 0
        self.temp_diff_perfect_count = 0
        self.light_diff_ok_count = 0
        self.light_diff_perfect_count = 0

        self.house = House(self.world.time_step_in_minutes)
        self.outside_sensors = [OutsideSensor(self.house) for _ in range(1)]

        # register listeners:
        for outside_sensor in self.outside_sensors:
            self.world.register(outside_sensor)

        # transfer initial information to listeners
        self.world.update_listeners()

        return self.serialize_state(self.get_current_state())

    def get_actions(self):
        """Returns list of action-method names (possible actions)

        Returns:
            actions (list of strings): A list of action-method names

        Example:
            H = HouseEnergyEnvironment()
            actions = H.get_actions()

            # to make an action use pass its name to the step method, f.e.:
            H.step(actions[-1])

        """

        return [
            action for action in dir(self.house)
            if callable(getattr(self.house, action))
            and re.match("action.*", action)
        ]

    def get_current_state(self):
        """Returns a dicitonary of unnormalized environment state values.

        Use this method to gather human-readable information about the state.
        This method shouldn't be used by the reinforcement learning agent, as
        the values aren't normalized. Normalized state is returned by the step
        method, which uses _serialize_state() method to normalize this dict.

        Returns(OrderedDict):
            Current outside and inside sensor values, user
            desired values, action-controllable settings of devices,
            daytime and reward for the last timeframe.
        """
        outside_params = [sensor.get_info() for sensor in self.outside_sensors]
        inside_params = self.house.get_inside_params()

        current_state = OrderedDict([])

        for sensor_info in outside_params:
            current_state.update(sensor_info)

        for param_key, param_value in inside_params.items():
            if param_key == 'inside_sensors':
                for sensor_key, sensor_values in param_value.items():
                    for key, value in sensor_values.items():
                        current_state[sensor_key + "." + key] = value
            elif param_key == "desired" or param_key == "devices_settings":
                for key, value in param_value.items():
                    current_state[key] = value
            else:
                current_state[param_key] = param_value

        current_state['Reward'] = self.last_reward

        return current_state

    @staticmethod
    def serialize_state(state):
        """Returns 1-dim ndarray of normalized state parameters from dict

        Args:
            state(OrderedDict) - the exact product of _get_current_state method.

        Note: Method assumes all temperature indicators are from range
        (-20, 40) + every temperature indicator contains 'temp' in the key name
        - and this is a project global assumption.

        Notice that this method gets deletes the daytime entry and any constant
        values.

        Returns(ndarray):
            Normalized, 'neural-net ready' state 1-dim array.

        Current array structure:
        [0] Outside Temperature
        [1] Outside Light
        [2] Clouds
        [.] Rain
        [.] Wind
        [.] temperature
        [ ] temperature_delta
        [ ] light
        [ ] temp_desired
        [ ] light_desired
        [ ] grid_cost
        [ ] energy_src
        [ ] cooling_lvl
        [ ] heating_lvl
        [ ] light_lvl
        [ ] curtains_lvl
        [ ] battery_level
        [ ] battery_delta
        """

        del state['Daytime']
        del state['Reward']

        for key, value in state.items():
            if re.match('.*temp.*', key, re.IGNORECASE):
                state[key] = (value + 20) / 60

        state['energy_src'] = 1 if state['energy_src'] is 'grid' else 0
        state['battery_level'] /= state['battery_max']
        del state['battery_max']
        state['battery_delta'] /= 10

        return np.array(list(state.values()))

    def get_episode_stats(self):
        """Provides statistic comfort-related info about episode.

        Returns:
            dictionary with current statistics expressed in percent of
            current episode time.

        Returns the correct values only if the environment works in the
        collect_stats mode and there was at least one step taken; returns None
        if not. The stats functionality works only for one sensor version
        of the environment
        """

        if self.collect_stats and self.timesteps != 0:
            t_ok = 100 * self.temp_diff_ok_count / self.timesteps
            t_perf = 100 * self.temp_diff_perfect_count / self.timesteps
            l_ok = 100 * self.light_diff_ok_count / self.timesteps
            l_perf = 100 * self.light_diff_perfect_count / self.timesteps

            return {
                'Temperature difference < {}'.format(self.config['stats']["temp_ok_diff"]):
                t_ok,
                'Temperature difference < {}'.format(self.config['stats']["temp_perfect_diff"]):
                t_perf,
                'Light difference < {}'.format(self.config['stats']["light_ok_diff"]):
                l_ok,
                'Light difference < {}'.format(self.config['stats']["light_perfect_diff"]):
                l_perf
            }
        else:
            return None

    def _update_stats(self, state):
        """Updates the statistics of fulfilling the desired values.

        Updating stats is done by checking the absolute
        difference between current and desired values. Current values are
        taken from the first, main sensor in the house. You can change the
        "ok" and "perfect" difference values in the global configuration.

        If the difference is smaller than given value, the statistic is
        increased. Note that the statistics are just counts - the episode
        percents are calculated in the get_episode_stats method.

        Args:
            state(OrderedDict): dictionary in format returned by
                                _get_current_state() method
        """

        self.timesteps += 1
        temp_difference = abs(state['first.temperature'] -
                              state['temp_desired'])
        light_difference = abs(state['first.light'] - state['light_desired'])

        if temp_difference < self.config['stats']["temp_ok_diff"]:
            self.temp_diff_ok_count += 1
            if temp_difference < self.config['stats']["temp_perfect_diff"]:
                self.temp_diff_perfect_count += 1
        if light_difference < self.config['stats']["light_ok_diff"]:
            self.light_diff_ok_count += 1
            if light_difference < self.config['stats']["light_perfect_diff"]:
                self.light_diff_perfect_count += 1