Пример #1
0
    def update(self, solved_prob, rewards):
        for i, reward in enumerate(rewards):
            if reward >= 0:
                reward = reward / self.p[i]
                self.w[i] = self.w[i] * np.exp(self.gamma * reward / len(SOLVERS))

        #TODO: Implement pruning
        if is_solved(solved_prob.result):
            self.solved.append(solved_prob)
Пример #2
0
 def update(self, solved_prob, rewards):
     for i, r in enumerate(rewards):
         if r > 0:
             self.dist.update(i, 1)
         elif r == 0:
             self.dist.update(i, 0)
         else:
             pass
     if is_solved(solved_prob.result):
         self.solved.append(solved_prob)
Пример #3
0
    def update(self, solved_prob, rewards):
        X = np.array(solved_prob.datapoint).reshape(1, -1)
        y = np.array([list(SOLVERS.keys()).index(solved_prob.solve_method)])
        if self.fitted:
            self.clf.partial_fit(X, y)
        else:
            self.clf.partial_fit(X, y, classes=np.unique(list(range(len(SOLVERS)))))
        self.fitted = True

        #TODO: Implement pruning
        if is_solved(solved_prob.result):
            self.solved.append(solved_prob)
Пример #4
0
 def update(self, solved_prob, rewards):
     for i, r in enumerate(rewards):
         if r > 0:
             self.totals[i] += r
             self.counts[i] += 1
             self.values[i] = self.totals[i] / self.counts[i]
         elif r == 0:
             self.counts[i] += 1
             self.values[i] = self.totals[i] / self.counts[i]
         else:
             pass
     if is_solved(solved_prob.result):
         self.solved.append(solved_prob)
Пример #5
0
    def update(self, solved_prob, rewards):
        point = solved_prob.datapoint.reshape((len(solved_prob.datapoint), 1))
        for i, r in enumerate(rewards):
            if r >= 0:
                self.A_0 += self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Cs[i]
                self.B_0 += self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Bs[i]
                self.As[i] = self.As[i] + point @ point.T
                self.Bs[i] = self.Bs[i] + r * point
                self.Cs[i] = self.Cs[i] + point @ point.T
                self.A_0 += point @ point.T - self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Cs[i]
                self.B_0 += r * point - self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Bs[i]

        #TODO: Implement pruning
        if is_solved(solved_prob.result):
            self.solved.append(solved_prob)
Пример #6
0
 def update(self, solved_prob, rewards):
     #TODO: Implement pruning
     if is_solved(solved_prob.result):
         self.solved.append(solved_prob)