Exemplo n.º 1
0
    def add_base_feature(self, center, dim, Q):
        """
        adds a new 1-dimensional feature and returns its index
        """
        new_f = KernelizedFeature(
            center=center,
            dim=[dim],
            kernel_args=self.kernel_args,
            kernel=self.kernel,
            index=self.features_num,
        )
        self.features.append(new_f)

        self.base_id_sets.add(new_f.base_ids)
        self.sorted_ids.push(-1, self.features_num)
        self.logger.debug("Added Feature {} {}".format(self.features_num,
                                                       new_f))

        # add combinations with all existing features as candidates
        new_cand = {(f, self.features_num): Candidate(f, self.features_num)
                    for f in range(self.features_num)
                    if dim not in self.features[f].dim}

        self.candidates.update(new_cand)
        for f, _ in list(new_cand.keys()):
            self.base_id_sets.add(new_f.base_ids | self.features[f].base_ids)
        self.features_num += 1

        # add parameter dimension
        if self.normalization:
            self.weight = add_new_features(self.weight, Q)
        else:
            self.weight = add_new_features(self.weight)
        return self.features_num - 1
Exemplo n.º 2
0
    def add_refined_feature(self, index1, index2, Q):
        """
        adds the combination of 2 existing features to the representation
        """
        f1 = self.features[index1]
        f2 = self.features[index2]
        new_center = np.zeros_like(f1.center)
        cnt = np.zeros_like(f1.center)
        cnt[f1.dim] += 1
        cnt[f2.dim] += 1
        cnt[cnt == 0] = 1.0
        new_center[f1.dim] += f1.center[f1.dim]
        new_center[f2.dim] += f2.center[f2.dim]
        new_center /= cnt
        new_dim = list(frozenset(f1.dim) | frozenset(f2.dim))
        new_base_ids = f1.base_ids | f2.base_ids
        new_dim.sort()
        new_f = KernelizedFeature(
            center=new_center,
            dim=new_dim,
            kernel_args=self.kernel_args,
            kernel=self.kernel,
            index=self.features_num,
            base_ids=new_base_ids,
        )
        self.features.append(new_f)
        # Priority is the negative number of base ids
        self.sorted_ids.push(-len(new_f.base_ids), self.features_num)
        # assert(len(self.sorted_ids.toList()) == self.features_num + 1)
        self.base_id_sets.add(new_f.base_ids)
        del self.candidates[(index1, index2)]

        # add new candidates
        new_cand = {
            (f, self.features_num): Candidate(f, self.features_num)
            for f in range(self.features_num)
            if (self.features[f].base_ids
                | new_base_ids) not in self.base_id_sets
            and len(frozenset(self.features[f].dim) & frozenset(new_dim)) == 0
        }
        for c, _ in list(new_cand.keys()):
            self.base_id_sets.add(new_base_ids | self.features[c].base_ids)
        self.candidates.update(new_cand)
        self.logger.debug("Added refined feature {} {}".format(
            self.features_num, new_f))
        self.logger.debug("{} candidates".format(len(self.candidates)))
        self.features_num += 1
        if self.normalization:
            self.weight = add_new_features(self.weight, Q)
        else:
            self.weight = add_new_features(self.weight)

        return self.features_num - 1
Exemplo n.º 3
0
 def _expand_vectors(self, num_expansions):
     """
     correct size of GQ weight and e-traces when new features were expanded
     """
     new_elem = np.zeros((self.representation.num_actions, num_expansions))
     self.gqweight = add_new_features(self.gqweight, new_elem)
     if self.lambda_:
         # Correct the size of eligibility traces (pad with zeros for new
         # features)
         self.eligibility_trace = add_new_features(
             self.eligibility_trace, self.representation.num_actions, new_elem
         )
         self.eligibility_trace_s = add_new_features(
             self.eligibility_trace_s, np.zeros((1, num_expansions))
         )
Exemplo n.º 4
0
    def learn(self, s, p_actions, a, r, ns, np_actions, na, terminal):
        # The previous state could never be terminal
        # (otherwise the episode would have already terminated)
        prevStateTerminal = False

        self.representation.pre_discover(s, prevStateTerminal, a, ns, terminal)
        discount_factor = self.discount_factor
        phi_s = self.representation.phi(s, prevStateTerminal)
        phi = self.representation.phi_sa(s, prevStateTerminal, a, phi_s)
        phi_prime_s = self.representation.phi(ns, terminal)
        # here comes the difference between SARSA and Q-Learning
        na = self._future_action(ns, terminal, np_actions, phi_prime_s, na)
        phi_prime = self.representation.phi_sa(ns, terminal, na, phi_prime_s)
        nnz = count_nonzero(phi_s)  # Number of non-zero elements

        # Set eligibility traces:
        if self.lambda_ > 0:
            expanded = (phi.shape[0] - self.eligibility_trace.shape[0]
                        ) // self.representation.num_actions
            if expanded > 0:
                # Correct the size of eligibility traces (pad with zeros for
                # new features)
                new_trace = add_new_features(
                    self.eligibility_trace.reshape(
                        self.representation.num_actions, -1),
                    np.zeros((self.representation.num_actions, expanded)),
                )
                self.eligibility_trace = new_trace.flatten()

            self.eligibility_trace *= discount_factor * self.lambda_
            self.eligibility_trace += phi

            # Set max to 1
            self.eligibility_trace[self.eligibility_trace > 1] = 1
        else:
            self.eligibility_trace = phi

        td_error = r + np.dot(discount_factor * phi_prime - phi,
                              self.representation.weight_vec)
        if nnz > 0:
            self.updateLearnRate(phi, phi_prime, self.eligibility_trace,
                                 discount_factor, nnz, terminal)
            weight_old = self.representation.weight.copy()
            self.representation.weight_vec += (
                self.learn_rate * self.representation.feature_learning_rate() *
                td_error * self.eligibility_trace)
            if not np.all(np.isfinite(self.representation.weight_vec)):
                self.representation.weight = weight_old
                import warnings

                warnings.warn(
                    "WARNING: TD-Learning diverged, weight_vec reached infinity!"
                )
        # Discover features if the representation has the discover method
        expanded = self.representation.post_discover(s, prevStateTerminal, a,
                                                     td_error, phi_s)

        if terminal:
            # If THIS state is terminal:
            self.episode_terminated()
Exemplo n.º 5
0
Arquivo: ifdd.py Projeto: kngwyu/rlpy3
 def updateWeight(self, p1_index, p2_index):
     """
     Add a new weight corresponding to the new added feature for all actions.
     The new weight is set to zero if sparsify = False, and equal to the
     sum of weights corresponding to the parents if sparsify = True
     """
     if self.sparsify:
         new_elem = self.weight[:, p1_index] + self.weight[:, p2_index]
     else:
         new_elem = None
     self.weight = add_new_features(self.weight, new_elem)
     # We dont want to reuse the hased phi because phi function is changed!
     self.hashed_s = None
Exemplo n.º 6
0
 def add_new_weight(self):
     """
     Add a new zero weight, corresponding to a newly added feature,
     to all actions.
     """
     self.weight = add_new_features(self.weight)