def evaluation(args, r, high_level_planning): velocity_tracking = np.empty( (6, args.num_iters, args.num_latent_action_per_iteration)) save_dir = utils.make_dir( os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None exp_variables = { 'finish_exp': [0], 'not_finish_one_iter': [1], 'finish_one_step': [0], 'update_z_action': [0], } ru.set_variables(r, exp_variables) for i in range(0, args.num_iters): ru.wait_for_key(r, 'not_finish_one_iter', change=False) print('Iteration: ', i + 1) input('Press any key after initialized robot') ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) for j in range(args.num_latent_action_per_iteration): pre_com_state = state if not j % 5: target = target_velocity_test[int((j + 1) / 5)] / 1.5 * 2 # take current state and plan for next z_action and sent to daisy latent_action = high_level_planning.plan_latent_action( pre_com_state, target) exp_variables['z_action'] = latent_action.tolist() exp_variables['update_z_action'] = [1] ru.set_variables(r, exp_variables) # check if finish one step ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) post_com_state = state # if utils.check_data_useful(post_com_state): high_level_obs, high_level_delta_obs = utils.HL_obs( pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) predict_state = high_level_planning.model_predict( high_level_obs, latent_action) velocity_tracking[0][i][j] = target[0] velocity_tracking[1][i][j] = target[1] velocity_tracking[2][i][j] = predict_state[4] velocity_tracking[3][i][j] = predict_state[5] velocity_tracking[4][i][j] = high_level_delta_obs[4] velocity_tracking[5][i][j] = high_level_delta_obs[5] np.save( save_dir + '/' + args.high_level_policy_type + '_velocity_tracking_test.npy', velocity_tracking) exp_variables['not_finish_one_iter'] = [0] ru.set_variables(r, exp_variables) exp_variables = ru.get_variables(r) velocity_record = exp_variables['record_vel'] np.save( save_dir + '/' + args.high_level_policy_type + '_velocity_record.npy', np.array(velocity_record)) # experiment ends exp_variables['finish_exp'] = [1] ru.set_variables(r, exp_variables)
def evaluation(args, r, high_level_planning): position_tracking = [ [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ] save_dir = utils.make_dir( os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None exp_variables = { 'finish_exp': [0], 'not_finish_one_iter': [1], 'finish_one_step': [0], 'update_z_action': [0], } ru.set_variables(r, exp_variables) cost_record = [] for iter in range(0, args.num_iters): position_tracking = [[], [], [], []] ru.wait_for_key(r, 'not_finish_one_iter', change=False) print('Iteration: ', iter + 1) input('Press any key after initialized robot') ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) target = target_velocity # position_tracking[2*iter].append(state['base_pos_x'][0]) # position_tracking[2*iter+1].append(state['base_pos_y'][0]) j = 0 total_cost = 0 # while True: for i in range(args.num_latent_action_per_iteration): pre_com_state = state # take current state and plan for next z_action and sent to daisy latent_action = high_level_planning.plan_latent_action( pre_com_state, target) exp_variables['z_action'] = latent_action.tolist() exp_variables['update_z_action'] = [1] ru.set_variables(r, exp_variables) # check if finish one step ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) post_com_state = state cost = utils.easy_cost(target, pre_com_state, post_com_state) total_cost += cost # if utils.check_data_useful(post_com_state): high_level_obs, high_level_delta_obs = utils.HL_obs( pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) predict_state = high_level_planning.model_predict( high_level_obs, latent_action) # position_tracking[2*iter].append(state['base_pos_x'][0]) # position_tracking[2*iter+1].append(state['base_pos_y'][0]) print([state['base_pos_x'][0], state['base_pos_y'][0]], high_level_delta_obs[0:2], predict_state[0:2]) j += 1 # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.25 : # print("Reach One Goal !!!!") # np.save(save_dir +'/' + args.high_level_policy_type +'_multi_tgt_test_'+str(iter)+'.npy', np.array(position_tracking) ) # break # if j>30: # print('Did not reach goal') # break cost_record.append(total_cost) print(cost_record) exp_variables['not_finish_one_iter'] = [0] ru.set_variables(r, exp_variables) input('wait for pos') exp_variables = ru.get_variables(r) # state_record = exp_variables['record_pos'] # velocity_record = exp_variables['record_vel'] # np.save(save_dir +'/' + args.high_level_policy_type +'_velocity_record_'+ str(cost_index) + '_' + str(iter), np.array(velocity_record)) # np.save(save_dir +'/' + args.high_level_policy_type +'_state_record_'+ str(cost_index) + '_' + str(iter), np.array(state_record)) np.save( save_dir + '/' + args.high_level_policy_type + '_' + args.low_level_policy_type + '_different_cost_test' + str(cost_index) + '.npy', np.array(cost_record)) # np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type + str(iter)+'_com.npy', np.array(position_tracking) ) # experiment ends exp_variables['finish_exp'] = [1] ru.set_variables(r, exp_variables)
def collect_data_client(args, r, high_level_planning, HL_replay_buffer): exp_variables = { 'finish_exp': [0], 'not_finish_one_iter': [1], 'finish_one_step': [0], 'update_z_action': [0], } ru.set_variables(r, exp_variables) for i in range(17, args.num_iters): ru.wait_for_key(r, 'not_finish_one_iter', change=False) print('Iteration: ', i + 1) input('Press any key after initialized robot') ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) for k in range(args.num_latent_action_per_iteration): pre_com_state = state q = k % 5 if q == 0: target = np.clip(0.2 * np.random.randn(3), -0.25, 0.25) latent_action = np.array( latent_all[i]) + 0.1 * np.random.randn(args.z_dim) # latent_action = high_level_planning.sample_latent_action(target) if q == 3 or q == 4: target = np.zeros(3) latent_action = np.array( [0.2068, 0.1836], ) + 0.1 * np.random.randn(args.z_dim) # take current state and plan for next z_action and sent to daisy t_start = datetime.datetime.now() # if args.test: # latent_action = high_level_planning.plan_latent_action(pre_com_state, target) # else: # latent_action = high_level_planning.sample_latent_action(target) t_end = datetime.datetime.now() exp_variables['z_action'] = latent_action.tolist() exp_variables['update_z_action'] = [1] ru.set_variables(r, exp_variables) # check if finish one step ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) post_com_state = state if utils.check_data_useful(post_com_state): high_level_obs, high_level_delta_obs = utils.HL_obs( pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) HL_replay_buffer.add(high_level_obs, latent_action, 0, high_level_delta_obs, 1) if utils.check_robot_dead(post_com_state): break exp_variables['not_finish_one_iter'] = [0] ru.set_variables(r, exp_variables) if (i + 1) % 1 == 0: model_save_dir = utils.make_dir( os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None HL_replay_buffer.save_buffer(model_save_dir) # experiment ends exp_variables['finish_exp'] = [1] ru.set_variables(r, exp_variables)
def run_LLTG_IK(env, args, r): # initialize robot a = input('Select way to initialize the robot') if str(a) == '1': #start from ground state = motion_library.demo_standing(env, shoulder=SHOULDER, elbow=ELBOW) else: state = motion_library.hold_position(env, shoulder=SHOULDER, elbow=ELBOW) print('Robot initialized, press key on client!') velocity_stack = [[], []] velocity_record = [[], []] postion_record = [[], [], []] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') low_level_TG = LLTG.low_level_TG( device=device, z_dim=args.z_dim, a_dim=args.a_dim, num_timestep_per_footstep=args.num_timestep_per_footstep, batch_size=args.batch_size, low_level_policy_type=args.low_level_policy_type, update_low_level_policy=args.update_low_level_policy, update_low_level_policy_lr=args.update_low_level_policy_lr, init_state=state, ) des_height = state['base_pos_z'][0] exp_variables = ru.get_variables(r) exp_variables['finish_one_step'] = [1] velocity_stack[0].append(0) velocity_stack[1].append(0) exp_variables = ru.set_state(r, state, exp_variables) state = motion_library.hold_position(env, shoulder=SHOULDER, elbow=ELBOW) print('Start operation!') while True: # update swing/stance leg low_level_TG.policy.update_swing_stance() # do IK for step in range(1, args.num_timestep_per_footstep + 1): t_start = datetime.datetime.now() # check if footstep update/set a key stuff exp_variables = ru.get_variables(r) if exp_variables['update_z_action'][0]: z_action = np.array(exp_variables['z_action']) low_level_TG.policy.update_latent_action_params( state, z_action) exp_variables['update_z_action'] = [0] ru.set_variables(r, exp_variables) action = low_level_TG.get_action(state, step) state = env.step(action) velocity_stack[0].append(state['base_velocity'][0]) velocity_stack[1].append(state['base_velocity'][1]) velocity_record[0].append(state['base_velocity'][0]) velocity_record[1].append(state['base_velocity'][1]) postion_record[0].append((state['base_pos_x'][0])) postion_record[1].append((state['base_pos_y'][0])) postion_record[2].append((state['base_ori_euler'][2])) if len(velocity_stack[0]) > STACK_LENGTH: velocity_stack[0].pop(0) velocity_stack[1].pop(0) t_end = datetime.datetime.now() t_diff = (t_end - t_start).total_seconds() time.sleep(max(0, 1.0 / args.control_frequency - t_diff)) # finish one step and update to high level exp_variables['finish_one_step'] = [1] state['base_velocity'][0] = mean(velocity_stack[0]) state['base_velocity'][1] = mean(velocity_stack[1]) ru.set_state(r, state, exp_variables) exp_variables = ru.get_variables(r) if not exp_variables['not_finish_one_iter'][0]: exp_variables['not_finish_one_iter'][0] = [1] exp_variables['record_vel'] = velocity_record exp_variables['record_pos'] = postion_record ru.set_state(r, state, exp_variables) break
def evaluation(args, r, high_level_planning): save_dir = utils.make_dir( os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None exp_variables = { 'finish_exp': [0], 'not_finish_one_iter': [1], 'finish_one_step': [0], 'update_z_action': [0], } ru.set_variables(r, exp_variables) for iter in range(args.num_iters): ru.wait_for_key(r, 'not_finish_one_iter', change=False) print('Iteration: ', iter + 1) input('Press any key after initialized robot') ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) j = 0 target_index = 0 total_cost = 0 while True: target = square_circle_test[target_index] pre_com_state = state # take current state and plan for next z_action and sent to daisy latent_action = high_level_planning.plan_latent_action( pre_com_state, target) exp_variables['z_action'] = latent_action.tolist() exp_variables['update_z_action'] = [1] ru.set_variables(r, exp_variables) # check if finish one step ru.wait_for_key(r, 'finish_one_step') state, exp_variables = ru.get_state(r) post_com_state = state cost = utils.easy_cost(target, pre_com_state, post_com_state) total_cost += cost # if utils.check_data_useful(post_com_state): high_level_obs, high_level_delta_obs = utils.HL_obs( pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) predict_state = high_level_planning.model_predict( high_level_obs, latent_action) position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) print(state['base_pos_x'][0], state['base_pos_y'][0]) j += 1 if np.linalg.norm(target[0:2] - np.array( [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.2: print("Reach Goal %s!!!!" % str(target_index)) np.save( save_dir + '/' + args.high_level_policy_type + '_square_test' + str(test_index) + '.npy', np.array(position_tracking)) target_index += 1 if target_index == np.shape(square_circle_test)[0]: np.save( save_dir + '/' + args.high_level_policy_type + '_square_test_cost_' + str(test_index) + '.npy', np.array([total_cost])) break if j > 500: print('Did not reach goal') break exp_variables['not_finish_one_iter'] = [0] ru.set_variables(r, exp_variables) input('wait for pos') exp_variables = ru.get_variables(r) state_record = exp_variables['record_pos'] np.save( save_dir + '/' + 'state' + '_square_test' + str(test_index) + '.npy', np.array(state_record)) # experiment ends exp_variables['finish_exp'] = [1] ru.set_variables(r, exp_variables)