class EmpiricalLearningProgress(): def __init__(self, task_size): self.interest_knn = BufferedDataset(1, task_size, buffer_size=2000, lateness=0) #self.window_size = 1000 def get_lp(self, task, competence): interest = 0 if len(self.interest_knn) > 5: # compute learning progre ss for new task dist, idx = self.interest_knn.nn_y(task) # closest_previous_task = previous_tasks[idx] closest_previous_task = self.interest_knn.get_y(idx[0]) closest_previous_task_competence = self.interest_knn.get_x(idx[0]) # print 'closest previous task is index:%s, val: %s' % (idx[0], closest_previous_task) # compute Progress as absolute difference in competence progress = closest_previous_task_competence - competence interest = np.abs(progress) # add to database self.interest_knn.add_xy(competence, task) return interest
class EmpiricalALPComputer(): def __init__(self, task_size, max_size=None, buffer_size=500): self.alp_knn = BufferedDataset(1, task_size, buffer_size=buffer_size, lateness=0, max_size=max_size) def compute_alp(self, task, reward): alp = 0 if len(self.alp_knn) > 5: # Compute absolute learning progress for new task # 1 - Retrieve closest previous task dist, idx = self.alp_knn.nn_y(task) # 2 - Retrieve corresponding reward closest_previous_task_reward = self.alp_knn.get_x(idx[0]) # 3 - Compute alp as absolute difference in reward lp = reward - closest_previous_task_reward alp = np.abs(lp) # Add to database self.alp_knn.add_xy(reward, task) return alp
def __init__(self, task_size): self.interest_knn = BufferedDataset(1, task_size, buffer_size=2000, lateness=0)
def __init__(self, task_size, max_size=None, buffer_size=500): self.alp_knn = BufferedDataset(1, task_size, buffer_size=buffer_size, lateness=0, max_size=max_size)