Python LogFile примеры использования

Язык программирования: Python

Пространство имен/Пакет: plastk.utils

Класс/Тип: LogFile

Примеров на hotexamples.com: 4

Python LogFile - 4 примера найдено. Это лучшие примеры Python кода для plastk.utils.LogFile, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LogFile(1)

write(1)

Пример #1

Показать файл

Файл: td.py Проект: shubhampachori12110095/navigation-corpus

    def __init__(self, **args):
        from plastk.utils import LogFile

        super(TDAgent, self).__init__(**args)
        self.nopickle.append('policy_fn')
        self.policy_fn = getattr(self, self.action_selection)

        self.total_steps = 0

        if isinstance(self.history_log, str):
            self._history_file = LogFile(self.history_log)
        elif isinstance(self.history_log, file) or isinstance(
                self.history_log, LogFile):
            self._history_file = self.history_log

Пример #2

Показать файл

Файл: td.py Проект: ronaldahmed/robot-navigation

    def __init__(self,**args):
        from plastk.utils import LogFile
        
        super(TDAgent,self).__init__(**args)
        self.nopickle.append('policy_fn')
        self.policy_fn = getattr(self,self.action_selection)
        
        self.total_steps = 0

        if isinstance(self.history_log,str):
            self._history_file = LogFile(self.history_log)
        elif isinstance(self.history_log,file) or isinstance(self.history_log,LogFile):
            self._history_file = self.history_log

Пример #3

Показать файл

Файл: td.py Проект: ronaldahmed/robot-navigation

class TDAgent(Agent):
    """
    A generic temporal-difference (TD) agent with discrete actions.
    To create a new TD agent, override this class and implement the methods
    .Q(sensation,action=None) and .update_Q(sensation,action,delda,on_policy=True).

    Parameters:

    alpha  -- The learning rate, default = 0.1
    gamma  -- The discount factor, default = 1.0
    lambda_ -- The eligibility discount factor, default = 0.0.

    step_method -- The method for doing TD updates: 'sarsa' or 'q_learning'.
                     default = 'sarsa'

    action_selection -- The action selection method, default 'epsilon_greedy'.
                        To change action selection, set this to the name of the new method,
                        e.g. 'softmax'.

    initial_epsilon -- The starting epsilon for epsilon_greedy selection. (default=0.1)
    min_epsilon     -- The minimum (final) epsilon. (default = 0.0)
    epsilon_half_life -- The half-life for epsilon annealing. (default = 1)
    
    initial_temperature -- The starting temperature for softmax (Boltzman distribution)
                           selection. (default = 1.0)
    min_temperature     -- The min (final) temperature for softmax selection.
                           (default = 0.01)
    temperature_half_life -- The temperature half-life for softmax selection
                           (default = 1)

    actions -- The list of available actions - can be any Python object
               that is understood as an action by the environment
    """


    alpha =       Magnitude(default=0.1)
    gamma =       Magnitude(default=1.0)
    lambda_ =     Magnitude(default=0.0)

    step_method = Parameter(default="sarsa")

    action_selection = Parameter(default="epsilon_greedy")

    # epsilon-greedy selection parameters
    initial_epsilon =   Magnitude(default=0.1)
    min_epsilon =       Magnitude(default=0.0)
    epsilon_half_life = Number(default=1, bounds=(0,None))

    # softmax selection parameters
    initial_temperature =   Number(default=1.0, bounds=(0,None))
    min_temperature =       Number(default=0.01, bounds=(0,None))
    temperature_half_life = Number(default=1, bounds=(0,None))

    actions = Parameter(default=[])

    prune_eligibility = Magnitude(default=0.001)
    replacing_traces = Parameter(default=True)

    history_log = Parameter(default=None)
    allow_learning = Parameter(default=True)

    def __init__(self,**args):
        from plastk.utils import LogFile
        
        super(TDAgent,self).__init__(**args)
        self.nopickle.append('policy_fn')
        self.policy_fn = getattr(self,self.action_selection)
        
        self.total_steps = 0

        if isinstance(self.history_log,str):
            self._history_file = LogFile(self.history_log)
        elif isinstance(self.history_log,file) or isinstance(self.history_log,LogFile):
            self._history_file = self.history_log

    def unpickle(self):
        """
        Called automatically when the agent is unpickled.  Sets
        the action-selection function to its appropriate value.
        """
        super(TDAgent,self).unpickle()
        self.policy_fn = getattr(self,self.action_selection)


    def __call__(self,sensation,reward=None):
        """
        Do a step.  Calls the function selected in self.step_method
        and returns the action.
        """
        step_fn = getattr(self,self.step_method+'_step')

        action_index = step_fn(sensation,reward)
        if self.history_log:
            if reward is None:
                self._history_file.write('start\n')
            self._history_file.write(`sensation`+'\n')
            self._history_file.write(`reward`+'\n')
            if not is_terminal(sensation):
                self._history_file.write(`action_index`+'\n')
        return self.actions[action_index]

    def Q(self,sensation,action=None):
        """
        Return Q(s,a).  If action is None, return an array
        of Q-values for each action in self.actions
        with the given sensation.

        You must override this method to implement a TDAgent subclass.
        """
        raise NYI

    def update_Q(self,sensation,action,delta,on_policy=True):
        """
        Update Q(sensation,action) by delta.  on_policy indicates
        whether the step that produced the update was on- or
        off-policy.  Any eligibility trace updates should be done from
        within this method.

        You must override this method to implement a TDAgent subclass.
        """
        raise NYI

    def sarsa_step(self,sensation,reward=None):
        """
        Do a step using the SARSA update method.  Selects an action,
        computes the TD update and calls self.update_Q.  Returns the
        agent's next action.
        """
        if reward == None:
            return self._start_episode(sensation)

        rho = self.rho(reward)
        next_action = self.policy(sensation)

        if is_terminal(sensation):
            value = 0
        else:
            value = self.Q(sensation,next_action)

        last_value = self.Q(self.last_sensation,self.last_action)
        delta = rho + (self.gamma * value - last_value)

        self.verbose("controller step = %d, rho = %.2f"
                      % (self.total_steps,rho))
        self.verbose(("Q(t-1) = %.5f, Q(t) = %.5f, diff = %.5f,"+
                       "delta = %.5f, terminal? = %d")
                      % (last_value,value,value-last_value,
                         delta,is_terminal(sensation)))        

        if self.allow_learning:
            self.update_Q(self.last_sensation,self.last_action,delta)

        self.last_sensation = sensation
        self.last_action = next_action
        if isinstance(reward,list):
            self.total_steps += len(reward)
        else:
            self.total_steps += 1

        return next_action


    def q_learning_step(self,sensation,reward=None):
        """
        Do a step using Watkins' Q(\lambda) update method.  Selects an
        action, computes the TD update and calls
        self._q_learning_training.  Returns the agent's next action.
        """
        if reward == None:
            return self._start_episode(sensation)

        if self.allow_learning:
            self._q_learning_training(self.last_sensation,self.last_action,reward,sensation)
        
        self.last_sensation = sensation
        self.last_action = self.policy(sensation)
        if isinstance(reward,list):
            self.total_steps += len(reward)
        else:
            self.total_steps += 1
        return self.last_action

    def _q_learning_training(self,sensation,action,reward,next_sensation):
        """
        Do a single Q-lambda training step given (s,a,r,s').  Can be
        called from outside the q_learning_step method for off-policy
        training, experience replay, etc.
        """
        rho = self.rho(reward)

        last_Q = self.Q(sensation)
        last_value = last_Q[action]
        
        if is_terminal(next_sensation):
            value = 0
        else:
            value = max(self.Q(next_sensation))

        delta = rho + (self.gamma * value - last_value)
        
        self.verbose("r = %.5f, Q(t-1) = %.5f, Q(t) = %.5f, diff = %.5f, delta = %.5f, terminal? = %d"
                      % (rho,last_value,value,value-last_value,delta,is_terminal(next_sensation)))

        self.update_Q(sensation,action,delta,on_policy = (last_Q[action] == max(last_Q)))

        if delta:
            assert (self.Q(sensation,action) - last_value)/delta < 1.0
    
    def _start_episode(self,sensation):
        """
        Start a new episode.  Called from self.__call__ when the reward is None.
        """
        self.last_sensation = sensation
        self.last_action = self.policy(sensation)
        return self.last_action


    def policy(self,sensation):
        """
        Given a sensation, return an action.  Uses
        self.action_selection to get a distribution over the agent's
        actions.  Uses self.applicable_actions to prevent selecting
        inapplicable actions.

        Returns 0 if is_terminal(sensation).
        """
        if not is_terminal(sensation):
            actions = self.applicable_actions(sensation)
            return actions[weighted_sample(self.policy_fn(sensation,actions))]
        else:
            # In the terminal state, the action is irrelevant
            return 0
        
    def epsilon_greedy(self,sensation,applicable_actions):
        """
        Given self.epsilon() and self.Q(), return a distribution over
        applicable_actions as an array where each element contains the
        a probability mass for the corresponding action.  I.e.  The
        action with the highest Q gets p = self.epsilon() and the
        others get the remainder of the mass, uniformly distributed.
        """
        Q = array([self.Q(sensation,action) for action in applicable_actions])

        # simple epsilon-greedy policy
        # get a vector with a 1 where each max element is, zero elsewhere
        mask = (Q == mmax(Q))

        num_maxes = len(nonzero(mask))
        num_others = len(mask) - num_maxes

        if num_others == 0: return mask
        
        e0 = self.epsilon()/num_maxes
        e1 = self.epsilon()/num_others

        result = zeros(len(mask))+0.0
        putmask(result,mask,1-e0)
        putmask(result,mask==0,e1)
        return result

    def softmax(self,sensation,applicable_actions):
        """
        Given self.temperature() and self.Q(), return a Bolzman
        distribution over applicable_actions as an array where each
        element contains the a probability mass for the corresponding
        action.
        """
        temp = self.temperature()
        self.verbose("softmax, temperature = %.3f" % temp)
        Q = array([self.Q(sensation,action) for action in applicable_actions])
        return softmax(Q,temp)

    def normalized_softmax(self,sensation,applicable_actions):
        """
        Like softmax, except that the Q values are scaled into the
        range [0,1].  May make setting the initial temperature easier than with softmax.
        """
        temp = self.temperature()
        self.verbose("softmax, temperature = %.3f" % temp)
        Q = array([self.Q(sensation,action) for action in applicable_actions])
        return softmax(normalize_minmax(Q),temp)

    def temperature(self):
        """
        Using initial_temperature, min_temperature, and temperature_half_life,
        compute the temperature after self.total_steps, steps.
        """
        Ti = self.initial_temperature
        Tm = self.min_temperature
        decay = log(2)/self.temperature_half_life
        return Tm + (Ti - Tm) * exp( -decay * self.total_steps )

    def epsilon(self):
        """
        Using initial_epsilon, min_epsilon, and epsilon_half_life,
        compute epsilon after self.total_steps, steps.
        """
        Ei = self.initial_epsilon
        Em = self.min_epsilon
        decay = log(2)/self.epsilon_half_life
        return Em + (Ei - Em) * exp( -decay * self.total_steps )
    
    def rho(self,reward):
        """
        Compute the reward since the last step.
        
        IF the reward is a scalar, it is returned unchanged.

        If reward is a list, it is assumed to be a list of rewards
        accrued at a constant time step, and the discounted sum is
        returned.
        """
        if isinstance(reward,list):
            result = 0
            for r in reward:
                result = self.gamma*result + r
        else:
            result = reward
        return result

    def applicable(self,action,sensation):
        """
        If the given action has a method called 'applicable' return
        the value of action.applicable(sensation), otherwise return True.
        """
        if 'applicable' in dir(action):
            return action.applicable(sensation)
        else:
            return True

    def applicable_actions(self,sensation):
        """
        Return a list of the actions that are applicable to the given
        sensation.
        """
        return [a for a in range(len(self.actions))
                if self.applicable(self.actions[a],sensation)]

Пример #4

Показать файл

Файл: td.py Проект: shubhampachori12110095/navigation-corpus

class TDAgent(Agent):
    """
    A generic temporal-difference (TD) agent with discrete actions.
    To create a new TD agent, override this class and implement the methods
    .Q(sensation,action=None) and .update_Q(sensation,action,delda,on_policy=True).

    Parameters:

    alpha  -- The learning rate, default = 0.1
    gamma  -- The discount factor, default = 1.0
    lambda_ -- The eligibility discount factor, default = 0.0.

    step_method -- The method for doing TD updates: 'sarsa' or 'q_learning'.
                     default = 'sarsa'

    action_selection -- The action selection method, default 'epsilon_greedy'.
                        To change action selection, set this to the name of the new method,
                        e.g. 'softmax'.

    initial_epsilon -- The starting epsilon for epsilon_greedy selection. (default=0.1)
    min_epsilon     -- The minimum (final) epsilon. (default = 0.0)
    epsilon_half_life -- The half-life for epsilon annealing. (default = 1)
    
    initial_temperature -- The starting temperature for softmax (Boltzman distribution)
                           selection. (default = 1.0)
    min_temperature     -- The min (final) temperature for softmax selection.
                           (default = 0.01)
    temperature_half_life -- The temperature half-life for softmax selection
                           (default = 1)

    actions -- The list of available actions - can be any Python object
               that is understood as an action by the environment
    """

    alpha = Magnitude(default=0.1)
    gamma = Magnitude(default=1.0)
    lambda_ = Magnitude(default=0.0)

    step_method = Parameter(default="sarsa")

    action_selection = Parameter(default="epsilon_greedy")

    # epsilon-greedy selection parameters
    initial_epsilon = Magnitude(default=0.1)
    min_epsilon = Magnitude(default=0.0)
    epsilon_half_life = Number(default=1, bounds=(0, None))

    # softmax selection parameters
    initial_temperature = Number(default=1.0, bounds=(0, None))
    min_temperature = Number(default=0.01, bounds=(0, None))
    temperature_half_life = Number(default=1, bounds=(0, None))

    actions = Parameter(default=[])

    prune_eligibility = Magnitude(default=0.001)
    replacing_traces = Parameter(default=True)

    history_log = Parameter(default=None)
    allow_learning = Parameter(default=True)

    def __init__(self, **args):
        from plastk.utils import LogFile

        super(TDAgent, self).__init__(**args)
        self.nopickle.append('policy_fn')
        self.policy_fn = getattr(self, self.action_selection)

        self.total_steps = 0

        if isinstance(self.history_log, str):
            self._history_file = LogFile(self.history_log)
        elif isinstance(self.history_log, file) or isinstance(
                self.history_log, LogFile):
            self._history_file = self.history_log

    def unpickle(self):
        """
        Called automatically when the agent is unpickled.  Sets
        the action-selection function to its appropriate value.
        """
        super(TDAgent, self).unpickle()
        self.policy_fn = getattr(self, self.action_selection)

    def __call__(self, sensation, reward=None):
        """
        Do a step.  Calls the function selected in self.step_method
        and returns the action.
        """
        step_fn = getattr(self, self.step_method + '_step')

        action_index = step_fn(sensation, reward)
        if self.history_log:
            if reward is None:
                self._history_file.write('start\n')
            self._history_file.write( ` sensation ` + '\n')
            self._history_file.write( ` reward ` + '\n')
            if not is_terminal(sensation):
                self._history_file.write( ` action_index ` + '\n')
        return self.actions[action_index]

    def Q(self, sensation, action=None):
        """
        Return Q(s,a).  If action is None, return an array
        of Q-values for each action in self.actions
        with the given sensation.

        You must override this method to implement a TDAgent subclass.
        """
        raise NYI

    def update_Q(self, sensation, action, delta, on_policy=True):
        """
        Update Q(sensation,action) by delta.  on_policy indicates
        whether the step that produced the update was on- or
        off-policy.  Any eligibility trace updates should be done from
        within this method.

        You must override this method to implement a TDAgent subclass.
        """
        raise NYI

    def sarsa_step(self, sensation, reward=None):
        """
        Do a step using the SARSA update method.  Selects an action,
        computes the TD update and calls self.update_Q.  Returns the
        agent's next action.
        """
        if reward == None:
            return self._start_episode(sensation)

        rho = self.rho(reward)
        next_action = self.policy(sensation)

        if is_terminal(sensation):
            value = 0
        else:
            value = self.Q(sensation, next_action)

        last_value = self.Q(self.last_sensation, self.last_action)
        delta = rho + (self.gamma * value - last_value)

        self.verbose("controller step = %d, rho = %.2f" %
                     (self.total_steps, rho))
        self.verbose(("Q(t-1) = %.5f, Q(t) = %.5f, diff = %.5f," +
                      "delta = %.5f, terminal? = %d") %
                     (last_value, value, value - last_value, delta,
                      is_terminal(sensation)))

        if self.allow_learning:
            self.update_Q(self.last_sensation, self.last_action, delta)

        self.last_sensation = sensation
        self.last_action = next_action
        if isinstance(reward, list):
            self.total_steps += len(reward)
        else:
            self.total_steps += 1

        return next_action

    def q_learning_step(self, sensation, reward=None):
        """
        Do a step using Watkins' Q(\lambda) update method.  Selects an
        action, computes the TD update and calls
        self._q_learning_training.  Returns the agent's next action.
        """
        if reward == None:
            return self._start_episode(sensation)

        if self.allow_learning:
            self._q_learning_training(self.last_sensation, self.last_action,
                                      reward, sensation)

        self.last_sensation = sensation
        self.last_action = self.policy(sensation)
        if isinstance(reward, list):
            self.total_steps += len(reward)
        else:
            self.total_steps += 1
        return self.last_action

    def _q_learning_training(self, sensation, action, reward, next_sensation):
        """
        Do a single Q-lambda training step given (s,a,r,s').  Can be
        called from outside the q_learning_step method for off-policy
        training, experience replay, etc.
        """
        rho = self.rho(reward)

        last_Q = self.Q(sensation)
        last_value = last_Q[action]

        if is_terminal(next_sensation):
            value = 0
        else:
            value = max(self.Q(next_sensation))

        delta = rho + (self.gamma * value - last_value)

        self.verbose(
            "r = %.5f, Q(t-1) = %.5f, Q(t) = %.5f, diff = %.5f, delta = %.5f, terminal? = %d"
            % (rho, last_value, value, value - last_value, delta,
               is_terminal(next_sensation)))

        self.update_Q(sensation,
                      action,
                      delta,
                      on_policy=(last_Q[action] == max(last_Q)))

        if delta:
            assert (self.Q(sensation, action) - last_value) / delta < 1.0

    def _start_episode(self, sensation):
        """
        Start a new episode.  Called from self.__call__ when the reward is None.
        """
        self.last_sensation = sensation
        self.last_action = self.policy(sensation)
        return self.last_action

    def policy(self, sensation):
        """
        Given a sensation, return an action.  Uses
        self.action_selection to get a distribution over the agent's
        actions.  Uses self.applicable_actions to prevent selecting
        inapplicable actions.

        Returns 0 if is_terminal(sensation).
        """
        if not is_terminal(sensation):
            actions = self.applicable_actions(sensation)
            return actions[weighted_sample(self.policy_fn(sensation, actions))]
        else:
            # In the terminal state, the action is irrelevant
            return 0

    def epsilon_greedy(self, sensation, applicable_actions):
        """
        Given self.epsilon() and self.Q(), return a distribution over
        applicable_actions as an array where each element contains the
        a probability mass for the corresponding action.  I.e.  The
        action with the highest Q gets p = self.epsilon() and the
        others get the remainder of the mass, uniformly distributed.
        """
        Q = array([self.Q(sensation, action) for action in applicable_actions])

        # simple epsilon-greedy policy
        # get a vector with a 1 where each max element is, zero elsewhere
        mask = (Q == mmax(Q))

        num_maxes = len(nonzero(mask))
        num_others = len(mask) - num_maxes

        if num_others == 0: return mask

        e0 = self.epsilon() / num_maxes
        e1 = self.epsilon() / num_others

        result = zeros(len(mask)) + 0.0
        putmask(result, mask, 1 - e0)
        putmask(result, mask == 0, e1)
        return result

    def softmax(self, sensation, applicable_actions):
        """
        Given self.temperature() and self.Q(), return a Bolzman
        distribution over applicable_actions as an array where each
        element contains the a probability mass for the corresponding
        action.
        """
        temp = self.temperature()
        self.verbose("softmax, temperature = %.3f" % temp)
        Q = array([self.Q(sensation, action) for action in applicable_actions])
        return softmax(Q, temp)

    def normalized_softmax(self, sensation, applicable_actions):
        """
        Like softmax, except that the Q values are scaled into the
        range [0,1].  May make setting the initial temperature easier than with softmax.
        """
        temp = self.temperature()
        self.verbose("softmax, temperature = %.3f" % temp)
        Q = array([self.Q(sensation, action) for action in applicable_actions])
        return softmax(normalize_minmax(Q), temp)

    def temperature(self):
        """
        Using initial_temperature, min_temperature, and temperature_half_life,
        compute the temperature after self.total_steps, steps.
        """
        Ti = self.initial_temperature
        Tm = self.min_temperature
        decay = log(2) / self.temperature_half_life
        return Tm + (Ti - Tm) * exp(-decay * self.total_steps)

    def epsilon(self):
        """
        Using initial_epsilon, min_epsilon, and epsilon_half_life,
        compute epsilon after self.total_steps, steps.
        """
        Ei = self.initial_epsilon
        Em = self.min_epsilon
        decay = log(2) / self.epsilon_half_life
        return Em + (Ei - Em) * exp(-decay * self.total_steps)

    def rho(self, reward):
        """
        Compute the reward since the last step.
        
        IF the reward is a scalar, it is returned unchanged.

        If reward is a list, it is assumed to be a list of rewards
        accrued at a constant time step, and the discounted sum is
        returned.
        """
        if isinstance(reward, list):
            result = 0
            for r in reward:
                result = self.gamma * result + r
        else:
            result = reward
        return result

    def applicable(self, action, sensation):
        """
        If the given action has a method called 'applicable' return
        the value of action.applicable(sensation), otherwise return True.
        """
        if 'applicable' in dir(action):
            return action.applicable(sensation)
        else:
            return True

    def applicable_actions(self, sensation):
        """
        Return a list of the actions that are applicable to the given
        sensation.
        """
        return [
            a for a in range(len(self.actions))
            if self.applicable(self.actions[a], sensation)
        ]