def __init__(self, multi=False, multi_no=3, tracking=False): self.multigoal = multi self.n_goals = multi_no self.tracking = tracking self.rand = Randomizer() self.goal = None # self.step_in_episode = 0 self.metadata = { 'render.modes': ['human', 'rgb_array'] } self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437, radius=0.2022, height=0.2610, min_dist=0.1, halfsphere=True) # observation = 4 joints + 4 velocities + 2 coordinates for target joints_no = 4 if self.tracking: joints_no = 3 self.observation_space = spaces.Box(low=-1, high=1, shape=(joints_no + joints_no + 2,), dtype=np.float32) # action = 4 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(joints_no,), dtype=np.float32) self.diffs = [JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6)] self.cam = Camera(color=True) self.tracker = Tracker(TRACKING_GREEN["maxime_lower"], TRACKING_GREEN["maxime_upper"]) # self.tracker = Tracker(TRACKING_GREEN["duct_lower"], TRACKING_GREEN["duct_upper"]) if self.tracking: self.tracker_goal = Tracker(TRACKING_YELLOW["duckie_lower"], TRACKING_YELLOW["duckie_upper"]) self.calibration = np.load(os.path.join(config_dir(), "calib-ergoreacher-adr.npz"))["calibration"].astype( np.int16) self.last_step_time = time.time() self.pts_tip = deque(maxlen=32) self.pts_goal = deque(maxlen=32) self.last_frame = np.ones((480, 640, 3), dtype=np.uint8) self.pause_counter = 0 self.last_speed = 100 self.goals_done = 0 # self.goal_states=[] # for _ in range(10000): # goal = self.rhis.sampleSimplePoint() # self.goal_states.append(self._goal2pixel(goal)) self._init_robot() super().__init__()
def __init__(self, no_move=False, scaling=1, shield=True, sim=False, compensation=True): self.no_move = no_move self.scaling = scaling self.shield = shield self.sim = sim self.compensation = compensation self.rand = Randomizer() self.last_step_time = None self.step_in_episode = 0 self.step_in_fight = 0 self.metadata = {'render.modes': ['human', 'rgb_array']} # 6 own joint pos, 6 own joint vel, 6 enemy joint pos, 6 enemy joint vel self.observation_space = spaces.Box(low=-1, high=1, shape=(6 + 6 + 6 + 6, ), dtype=np.float32) self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) self.diffs = [ JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6) ] self._init_robots()
import time from poppy_helpers.normalizer import Normalizer from pypot.robot import from_remote from pytorch_a2c_ppo_acktr.inference import Inference from poppy_helpers.controller import SwordFightController from poppy_helpers.randomizer import Randomizer import numpy as np from poppy_helpers.startups import startup_swordfight controller_att, controller_def = startup_swordfight("flogo2","flogo4") # random defense rand = Randomizer() # rand_def = rand.random_sf() # controller_def.goto_pos(rand_def) norm = Normalizer() inf = Inference("/home/florian/dev/pytorch-a2c-ppo-acktr/" "trained_models/ppo/" "ErgoFightStatic-Headless-Fencing-v0-180209225957.pt") # ErgoFightStatic-Headless-Fencing-v0-180301140951.pt - very stabby, bit dumb # ErgoFightStatic-Headless-Fencing-v0-180301140937.pt - pretty good, slightly stabby # ErgoFightStatic-Headless-Fencing-v0-180301140520.pt - ultimate stabby policy # ErgoFightStatic-Headless-Fencing-v0-180209225957.pt - aggressive lunge policy, good tracking, best policy
import zmq import numpy as np from poppy_helpers.randomizer import Randomizer ROBOT1 = "flogo2.local" PORT = 5757 context = zmq.Context() socket = context.socket(zmq.PAIR) print("Connecting to server...") socket.connect("tcp://{}:{}".format(ROBOT1, PORT)) print("Connected.") observations = [] rand = Randomizer() for i in range(1000): req = {"robot": {"get_pos_speed": {}}} socket.send_json(req) answer = socket.recv_json() observations.append(answer) if i % 100 == 0: req = { "robot": { "set_pos": { "positions": rand.random_sf(scaling=1.0) } } }
import tkinter as tk import time import numpy as np from pytorch_a2c_ppo_acktr.inference import Inference from poppy_helpers.normalizer import Normalizer from poppy_helpers.randomizer import Randomizer from poppy_helpers.startups import startup_swordfight from poppy_helpers.sound import RemoteSound SCALING = 5 # how many hundred controller_att, controller_def = startup_swordfight("flogo2", "flogo4") rand = Randomizer() norm = Normalizer() inf = Inference("/home/florian/dev/pytorch-a2c-ppo-acktr/" "trained_models/ppo/" "ErgoFightStatic-Headless-Fencing-v0-180209225957.pt") snd = RemoteSound("flogo4.local") SOUND_EFFECT = "beep1" # SOUND_EFFECT = "Wilhelm_Scream" def get_observation(): norm_pos_att = norm.normalize_pos(controller_att.get_pos_comp()) norm_pos_att[0] *= -1 norm_vel_att = norm.normalize_vel(controller_att.get_current_speed()) norm_pos_def = norm.normalize_pos(controller_def.get_pos_comp())
import time from pypot.robot import from_remote from poppy_helpers.constants import SWORDFIGHT_REST_DEF, SWORDFIGHT_REST_ATT from poppy_helpers.controller import Controller, SwordFightController from poppy_helpers.randomizer import Randomizer poppy_def = from_remote('flogo4.local', 4242) poppy_att = from_remote('flogo2.local', 4242) controller_def = SwordFightController(poppy_def, mode="def") controller_att = SwordFightController(poppy_att, mode="att") rand = Randomizer() controller_def.rest() controller_att.rest() rand_def = rand.random_sf() rand_att = rand.random_sf() print(rand_def, rand_att) controller_att.set_speed(50) controller_def.set_speed(50) controller_def.goto_pos(rand_def) controller_att.goto_pos(rand_att) for _ in range(20): pos_left = controller_def.get_pos()
class ErgoFightLiveEnv(gym.Env): def __init__(self, no_move=False, scaling=1, shield=True, sim=False, compensation=True): self.no_move = no_move self.scaling = scaling self.shield = shield self.sim = sim self.compensation = compensation self.rand = Randomizer() self.last_step_time = None self.step_in_episode = 0 self.step_in_fight = 0 self.metadata = {'render.modes': ['human', 'rgb_array']} # 6 own joint pos, 6 own joint vel, 6 enemy joint pos, 6 enemy joint vel self.observation_space = spaces.Box(low=-1, high=1, shape=(6 + 6 + 6 + 6, ), dtype=np.float32) self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) self.diffs = [ JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6) ] self._init_robots() def _init_robots(self): if self.compensation: self.controller_def = SwordFightZMQController(mode="def", host="flogo4.local") self.controller_att = SwordFightZMQController(mode="att", host="flogo2.local") else: self.controller_def = ZMQController(host="flogo4.local") self.controller_att = ZMQController(host="flogo2.local") self._setSpeedCompliance() self.controller_def.get_keys() # in case there are keys stored def _setSpeedCompliance(self): self.controller_def.compliant(False) self.controller_att.compliant(False) self.controller_att.set_max_speed(100) self.controller_def.set_max_speed(100) def _seed(self, seed=None): np.random.seed(seed) def _restPos(self): self.done = False self._setSpeedCompliance() self.controller_def.safe_rest() self.controller_att.safe_rest() time.sleep(.6) # FIXME: run loop, check joints self.randomize(robot=1, scaling=self.scaling) time.sleep(1) # FIXME: instead run a loop here and check when # the joints are close to the given configuration self.controller_def.get_keys() # clear queue def randomize(self, robot=1, scaling=1.0): new_pos = self.rand.random_sf(scaling) robot_ctrl = self.controller_att if robot == 1: robot_ctrl = self.controller_def robot_ctrl.goto_pos(new_pos) def _reset(self): self.step_in_episode = 0 self._restPos() self._self_observe() self.last_step_time = time.time() return self.observation def _get_reward(self): collisions = self.controller_def.get_keys() reward = 0 if "s" in collisions: reward = 1 self._restPos() return reward def _self_observe(self): joint_vel_att = self.controller_att.get_posvel() joint_vel_def = self.controller_def.get_posvel() # self.observation = ((self._normalize( ### OLD AND WRONG IMPL # np.hstack((joint_vel_att, joint_vel_def)).astype('float32') # )), ) self.observation = ( ( # this should be the technically correct solution np.hstack((self._normalize(joint_vel_att), self._normalize(joint_vel_def)))), ) def _normalize(self, pos): pos = np.array(pos).astype('float32') out = [] for i in range(6): shifted = (pos[i] - JOINT_LIMITS[i][0]) / self.diffs[i] # now it's in [0,1] norm = shifted * 2 - 1 if i == 0 and self.sim: norm *= -1 # between sim and real the first joint it inverted out.append(norm) if len(pos) > 6: shifted = (pos[6:] + JOINT_LIMITS_SPEED) / (JOINT_LIMITS_SPEED * 2) norm = shifted * 2 - 1 if self.sim: norm[ 0] *= -1 # between sim and real the first joint it inverted (probably both pos * vel) out += list(norm) return out def _denormalize(self, actions): out = [] if self.sim: actions[0] *= -1 for i in range(6): shifted = (actions[i] + 1) / 2 # now it's within [0,1] denorm = shifted * self.diffs[i] + JOINT_LIMITS[i][0] out.append(denorm) return out def prep_actions(self, actions): actions = np.clip(actions, -1, 1) # first make sure actions are normalized actions = self._denormalize( actions) # then scale them to the actual joint angles return actions def _step(self, actions): self.step_in_episode += 1 actions = self.prep_actions(actions) self.controller_att.goto_pos(actions) if not self.no_move: if self.step_in_episode % MOVE_EVERY_N_STEPS == 0: # print ("step {}, randomizing".format(self.step_in_episode)) self.randomize(1, scaling=self.scaling) # observe again self._self_observe() reward = self._get_reward() dt = (time.time() - self.last_step_time) * 1000 if dt < MAX_REFRESHRATE: time.sleep((MAX_REFRESHRATE - dt) / 1000) self.last_step_time = time.time() return self.observation, reward, self.done, {} def _test_second_robot(self, actions): actions = self.prep_actions(actions) self.controller_def.goto_pos(actions) def _render(self, mode='human', close=False): # This intentionally does nothing and is only here for wrapper functions. # if you want graphical output, use the environments # "ErgoBallThrowAirtime-Graphical-Normalized-v0" # or # "ErgoBallThrowAirtime-Graphical-v0" # ... not the ones with "...-Headless-..." pass