def update(self, solved_prob, rewards): for i, reward in enumerate(rewards): if reward >= 0: reward = reward / self.p[i] self.w[i] = self.w[i] * np.exp(self.gamma * reward / len(SOLVERS)) #TODO: Implement pruning if is_solved(solved_prob.result): self.solved.append(solved_prob)
def update(self, solved_prob, rewards): for i, r in enumerate(rewards): if r > 0: self.dist.update(i, 1) elif r == 0: self.dist.update(i, 0) else: pass if is_solved(solved_prob.result): self.solved.append(solved_prob)
def update(self, solved_prob, rewards): X = np.array(solved_prob.datapoint).reshape(1, -1) y = np.array([list(SOLVERS.keys()).index(solved_prob.solve_method)]) if self.fitted: self.clf.partial_fit(X, y) else: self.clf.partial_fit(X, y, classes=np.unique(list(range(len(SOLVERS))))) self.fitted = True #TODO: Implement pruning if is_solved(solved_prob.result): self.solved.append(solved_prob)
def update(self, solved_prob, rewards): for i, r in enumerate(rewards): if r > 0: self.totals[i] += r self.counts[i] += 1 self.values[i] = self.totals[i] / self.counts[i] elif r == 0: self.counts[i] += 1 self.values[i] = self.totals[i] / self.counts[i] else: pass if is_solved(solved_prob.result): self.solved.append(solved_prob)
def update(self, solved_prob, rewards): point = solved_prob.datapoint.reshape((len(solved_prob.datapoint), 1)) for i, r in enumerate(rewards): if r >= 0: self.A_0 += self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Cs[i] self.B_0 += self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Bs[i] self.As[i] = self.As[i] + point @ point.T self.Bs[i] = self.Bs[i] + r * point self.Cs[i] = self.Cs[i] + point @ point.T self.A_0 += point @ point.T - self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Cs[i] self.B_0 += r * point - self.Cs[i].T @ np.linalg.inv(self.As[i]) @ self.Bs[i] #TODO: Implement pruning if is_solved(solved_prob.result): self.solved.append(solved_prob)
def update(self, solved_prob, rewards): #TODO: Implement pruning if is_solved(solved_prob.result): self.solved.append(solved_prob)