Results = np.zeros((N_exp, 101)) for nu in np.linspace(0, 1, 101): print("Experiments with nu={}".format(nu)) env_conf['nu'] = nu for expr in range(N_exp): objs = sample([i - 20 for i in range(10)], N_objs) theta_d = [atan(obj / D) + pi / 2 for obj in objs] env_conf['theta_d'] = theta_d cg = CareGiver(env_conf) arm = Arm(env_conf) slide = [] itt = 1 while True: arm.decided_wanted_obj() action = arm.take_next_action() R = cg.give_reward(action, arm.wanted_obj) arm.update_inner_state(action, R, kernel='gaussian') if len(slide) == M: if np.mean(slide) > threshold: Results[expr, int(nu * 100)] = itt
draw_world(objs, T, world) cv.imshow(world_im_name, world) cv.waitKey(int(1000*dt)) world = np.zeros((im_height,im_width,3), np.uint8) self.q1 += (N_frames-int(N_frames))*omega1*dt self.q2 += (N_frames-int(N_frames))*omega2*dt self.draw(world) draw_world(objs, T, world) cv.imshow(world_im_name, world) cv.waitKey(int(1000*dt)) ### Setting agents: env_config["nu"] = 0.3 cg = CareGiver(env_config) robot = Robot(env_config) mse = nn.MSELoss() ### Experiment and simulation: # Phase one: print("Phase one started") running_loss = 0.0 for pitt in range(N_pitt): a = np.zeros((N_anodes, N_anodes)) a[tuple(np.array(sample(range(N_anodes), 2)))] = 1
Results = {'Dirac':np.zeros((N_exp, N_itt - M)),\ 'GaussianS':np.zeros((N_exp, N_itt - M)),\ 'GaussianWS':np.zeros((N_exp, N_itt - M))} for expr in range(N_exp): objs = sample([i - 20 for i in range(10)], N_objs) theta_d = [atan(obj / D) + pi / 2 for obj in objs] env_config['theta_d'] = theta_d # Dirac kernel : print("Experiment - %s - %i" % ("Dirac kernel", expr)) env_config['nu'] = 0 cg = CareGiver(env_config) arm = Arm(env_config) slide = [] for itt in range(N_itt): cg.T = 3 * pi / N_anodes arm.decided_wanted_obj() action = arm.take_next_action() R, _ = cg.give_reward(action, arm.wanted_obj) arm.update_inner_state(action, R, kernel=None) if len(slide) == M: Results['Dirac'][expr, itt - M] = np.mean(slide)
world = np.zeros((im_height, im_width, 3), np.uint8) self.q1 += (N_frames - int(N_frames)) * omega1 * dt self.q2 += (N_frames - int(N_frames)) * omega2 * dt self.draw(world) draw_world(objs, T, world) cv.putText(world, data, (25, im_height - 25), cv.LINE_AA, 1, (0, 0, 255), 2) cv.imshow(world_im_name, world) cv.waitKey(int(1000 * dt)) out.write(world) ### Setting agents: cg = CareGiver(env_config) robot = Robot(env_config) ### Experiment and simulation: slide = [] R_moy = "None" for itt in range(N_itt): cg.T = 0.1 robot.decided_wanted_obj() action = robot.take_next_action() R, d = cg.give_reward(action, robot.wanted_obj) robot.update_inner_state(action, R, kernel="gaussian")
pitt, N_pitt, loss_0)) else: print('Pointing learning loss {}/{}: {}%'.format( pitt, N_pitt, 100 * (running_loss / batch_size - loss_0) / loss_0)) running_loss = 0.0 eaa.update_retina_to_action_mapper() slide = [] for itt in range(N_itt): theta_d = [atan((obj - des) / D) + pi / 2 for obj in objs] env_config["theta_d"] = theta_d cg = CareGiver(env_config) abstract_retina = np.array([[obj, objs[obj]] for obj in range(N_objs)]) eaa.decided_wanted_obj() action = eaa.take_next_action() R, _ = cg.give_reward(action, eaa.wanted_obj) eaa.update_inner_state(action, R, abstract_retina) if len(slide) == M: Results['GaussianS'][expr, itt - M] = np.mean(slide) slide.pop(0) slide.append(R) objs[0] += dx # Without scaffolding :
for N_objs in range(1, N_objs_max + 1): print("Experiments with {} objects".format(N_objs)) env_conf['N_objs'] = N_objs for expr in range(N_exp): # Dirac kernel : objs = sample([i - 2 * N_objs_max for i in range(N_objs_max)], N_objs) theta_d = [atan(obj / D) + pi / 2 for obj in objs] env_conf['theta_d'] = theta_d env_conf['nu'] = 0 cg = CareGiver(env_conf) arm = Arm(env_conf) slide = [] itt = 1 while True: cg.T = 3 * pi / N_nodes arm.decided_wanted_obj() action = arm.take_next_action() R = cg.give_reward(action, arm.wanted_obj) arm.update_inner_state(action, R, kernel=None) if len(slide) == M:
loss_0 = loss.item() print('Pointing learning loss {}/{}: {}'.format( pitt, N_pitt, loss.item())) else: print('Pointing learning loss {}/{}: {}%'.format( pitt, N_pitt, 100 * (loss.item() - loss_0) / loss_0)) eaa.update_retina_to_action_mapper() slide = [] for itt in range(N_itt): theta_d = [atan((obj - des) / D) + pi / 2 for obj in objs] env_config["theta_d"] = theta_d cg = CareGiver(env_config) abstract_retina = np.array([[obj, objs[obj]] for obj in range(N_objs)]) eaa.decided_wanted_obj() action = eaa.take_next_action() R = cg.give_reward(action, eaa.wanted_obj) eaa.update_inner_state(action, R, abstract_retina) if len(slide) == M: Results['GaussianS'][expr, itt - M] = np.mean(slide) slide.pop(0) slide.append(R) objs[0] += dx ''' # Without scaffolding :