示例#1
0
def test_rendering():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    img, depth = sim.render(200, 200, depth=True)
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')

    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
    depth = np.asarray(depth * 255, dtype=np.uint8)
    assert depth.shape == (200, 200)
    compare_imgs(depth, 'test_rendering.freecam.depth.png')

    img = sim.render(100, 100, camera_name="camera1")
    assert img.shape == (100, 100, 3)
    compare_imgs(img, 'test_rendering.camera1.png')

    img = sim.render(200, 100, camera_name="camera1")
    assert img.shape == (100, 200, 3)
    compare_imgs(img, 'test_rendering.camera1.narrow.png')

    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="mark")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering_markers.camera1.png')
示例#2
0
def test_rendering():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    img, depth = sim.render(200, 200, depth=True)
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')

    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
    depth = np.asarray(depth * 255, dtype=np.uint8)
    assert depth.shape == (200, 200)

    # Unfortunately mujoco 2.0 renders slightly different depth image on mac and on linux here
    if "darwin" in sys.platform.lower():
        compare_imgs(depth, 'test_rendering.freecam.depth-darwin.png')
    else:
        compare_imgs(depth, 'test_rendering.freecam.depth.png')

    img = sim.render(100, 100, camera_name="camera1")
    assert img.shape == (100, 100, 3)
    compare_imgs(img, 'test_rendering.camera1.png')

    img = sim.render(200, 100, camera_name="camera1")
    assert img.shape == (100, 200, 3)
    compare_imgs(img, 'test_rendering.camera1.narrow.png')

    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="mark")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering_markers.camera1.png')
示例#3
0
def test_rendering():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    img, depth = sim.render(200, 200, depth=True)
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')

    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
    depth = np.asarray(depth * 255, dtype=np.uint8)
    assert depth.shape == (200, 200)
    compare_imgs(depth, 'test_rendering.freecam.depth.png')

    img = sim.render(100, 100, camera_name="camera1")
    assert img.shape == (100, 100, 3)
    compare_imgs(img, 'test_rendering.camera1.png')

    img = sim.render(200, 100, camera_name="camera1")
    assert img.shape == (100, 200, 3)
    compare_imgs(img, 'test_rendering.camera1.narrow.png')

    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="mark")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering_markers.camera1.png')
示例#4
0
def test_concurrent_rendering():
    '''Best-effort testing that concurrent multi-threaded rendering works.
    The test has no guarantees around being deterministic, but if it fails
    you know something is wrong with concurrent rendering. If it passes,
    things are probably working.'''
    err = None
    def func(sim, event):
        event.wait()
        sim.data.qpos[:] = 0.0
        sim.forward()
        img1 = sim.render(width=40, height=40, camera_name="camera1")
        img2 = sim.render(width=40, height=40, camera_name="camera2")
        try:
            assert np.sum(img1[:]) == 23255
            assert np.sum(img2[:]) == 12007
        except Exception as e:
            nonlocal err
            err = e

    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.render(100, 100)
    event = Event()
    threads = []
    for _ in range(100):
        thread = Thread(target=func, args=(sim, event))
        threads.append(thread)
        thread.start()
    event.set()
    for thread in threads:
        thread.join()
    assert err is None, "Exception: %s" % (str(err))
示例#5
0
def test_concurrent_rendering():
    '''Best-effort testing that concurrent multi-threaded rendering works.
    The test has no guarantees around being deterministic, but if it fails
    you know something is wrong with concurrent rendering. If it passes,
    things are probably working.'''
    err = None

    def func(sim, event):
        event.wait()
        sim.data.qpos[:] = 0.0
        sim.forward()
        img1 = sim.render(width=40, height=40, camera_name="camera1")
        img2 = sim.render(width=40, height=40, camera_name="camera2")
        try:
            assert np.sum(img1[:]) == 23255
            assert np.sum(img2[:]) == 12007
        except Exception as e:
            nonlocal err
            err = e

    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.render(100, 100)
    event = Event()
    threads = []
    for _ in range(100):
        thread = Thread(target=func, args=(sim, event))
        threads.append(thread)
        thread.start()
    event.set()
    for thread in threads:
        thread.join()
    assert err is None, "Exception: %s" % (str(err))
示例#6
0
def test_rendering():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    img, depth = sim.render(200, 200, depth=True)
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')

    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
    depth = np.asarray(depth * 255, dtype=np.uint8)
    assert depth.shape == (200, 200)

    # Unfortunately mujoco 2.0 renders slightly different depth image on mac and on linux here
    if "darwin" in sys.platform.lower():
        compare_imgs(depth, 'test_rendering.freecam.depth-darwin.png')
    else:
        compare_imgs(depth, 'test_rendering.freecam.depth.png')

    img = sim.render(100, 100, camera_name="camera1")
    assert img.shape == (100, 100, 3)
    compare_imgs(img, 'test_rendering.camera1.png')

    img = sim.render(200, 100, camera_name="camera1")
    assert img.shape == (100, 200, 3)
    compare_imgs(img, 'test_rendering.camera1.narrow.png')

    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="mark")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering_markers.camera1.png')
示例#7
0
def test_multiple_sims():
    # Ensure that creating new simulators still produces good renderings.
    xml = """
    <mujoco>
        <asset>
            <texture name="t1" width="32" height="32" type="2d" builtin="flat" />
            <material name="m1" texture="t1" />
        </asset>
        <worldbody>
            <light diffuse=".5 .5 .5" pos="0 0 5" dir="0 0 -1" />
            <camera name="topcam" pos="0 0 2.5" zaxis="0 0 1" />
            <geom name="g1" pos="0 0 0" type="box" size="1 1 0.1" rgba="1 1 1 1" material="m1" />
        </worldbody>
    </mujoco>
    """

    model = load_model_from_xml(xml)
    random_state = np.random.RandomState(0)

    for i in range(3):
        sim = MjSim(model)
        sim.forward()
        modder = TextureModder(sim, random_state=random_state)
        for j in range(2):
            modder.rand_checker('g1')
            compare_imgs(sim.render(201, 205, camera_name="topcam"),
                         'test_multiple_sims.loop%d_%d.png' % (i, j))
示例#8
0
def test_multiple_sims():
    # Ensure that creating new simulators still produces good renderings.
    xml = """
    <mujoco>
        <asset>
            <texture name="t1" width="32" height="32" type="2d" builtin="flat" />
            <material name="m1" texture="t1" />
        </asset>
        <worldbody>
            <light diffuse=".5 .5 .5" pos="0 0 5" dir="0 0 -1" />
            <camera name="topcam" pos="0 0 2.5" zaxis="0 0 1" />
            <geom name="g1" pos="0 0 0" type="box" size="1 1 0.1" rgba="1 1 1 1" material="m1" />
        </worldbody>
    </mujoco>
    """

    model = load_model_from_xml(xml)
    random_state = np.random.RandomState(0)

    for i in range(3):
        sim = MjSim(model)
        sim.forward()
        modder = TextureModder(sim, random_state=random_state)
        for j in range(2):
            modder.rand_checker('g1')
            compare_imgs(
                sim.render(201, 205, camera_name="topcam"),
                'test_multiple_sims.loop%d_%d.png' % (i, j))
def _render_depth(sim: mujoco_py.MjSim, camera: str, render_height: int,
                  render_width: int, world_xml: ET.Element):
    # take screenshot
    frame = sim.render(render_width * 2,
                       render_height,
                       camera_name=camera,
                       depth=True)
    return frame[1]  # depth buffer
示例#10
0
def test_high_res():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    img = sim.render(1000, 1000)
    img = scipy.misc.imresize(img, (200, 200, 3))
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')
示例#11
0
def test_high_res():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    img = sim.render(1000, 1000)
    img = np.array(Image.fromarray(img).resize(size=(200, 200)))
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')
示例#12
0
def test_rendering_failing():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    sim.render(100, 100)
    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="blaaaa")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    try:
        compare_imgs(img, 'test_rendering_markers.camera1.png')
        assert False
    except Exception as e:
        pass
示例#13
0
def test_rendering_failing():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    sim.render(100, 100)
    render_context = sim.render_contexts[0]
    render_context.add_marker(size=np.array([.4, .5, .6]),
                              pos=np.array([.4, .5, .6]),
                              rgba=np.array([.7, .8, .9, 1.0]),
                              label="blaaaa")
    img = sim.render(200, 200, camera_name="camera1")
    assert img.shape == (200, 200, 3)
    try:
        compare_imgs(img, 'test_rendering_markers.camera1.png')
        assert False
    except Exception as e:
        pass
示例#14
0
def test_high_res():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    img = sim.render(1000, 1000)
    img = scipy.misc.imresize(img, (200, 200, 3))
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')
示例#15
0
def test_high_res():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()
    img = sim.render(1000, 1000)
    img = np.array(Image.fromarray(img).resize(size=(200, 200)))
    assert img.shape == (200, 200, 3)
    compare_imgs(img, 'test_rendering.freecam.png')
示例#16
0
def setup_sim(device_id):
    model = load_model_from_path("xmls/fetch/main.xml")
    sim = MjSim(model)

    image = sim.render(IMAGE_WIDTH, IMAGE_HEIGHT, device_id=device_id)
    assert image.shape == (IMAGE_HEIGHT, IMAGE_WIDTH, 3)

    return sim
示例#17
0
def test_materials():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.premod.png')

    random_state = np.random.RandomState(0)
    modder = MaterialModder(sim, random_state=random_state)

    modder.set_specularity('g1', 1.0)
    modder.set_reflectance('g2', 1.0)
    modder.set_shininess('g3', 1.0)
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.props.png')

    modder.rand_all('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.rand_all.png')
示例#18
0
def test_materials():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.premod.png')

    random_state = np.random.RandomState(0)
    modder = MaterialModder(sim, random_state=random_state)

    modder.set_specularity('g1', 1.0)
    modder.set_reflectance('g2', 1.0)
    modder.set_shininess('g3', 1.0)
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.props.png')

    modder.rand_all('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_materials.rand_all.png')
示例#19
0
def _render_seg(sim: mujoco_py.MjSim, camera: str, render_height: int, render_width: int, world_xml: ET.Element):
  lm = LightModder(sim)
  # switch all lights off
  light_names = [l.attrib['name'] for l in world_xml.findall(".//light")]
  for light_name in light_names:
    lm.set_active(light_name, 0)
  # take screenshot
  frame = sim.render(render_width * 2, render_height, camera_name=camera)
  # reset lights
  for light_name in light_names:
    lm.set_active(light_name, 1)
  return frame
示例#20
0
def test_glfw_context():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    render_context = MjRenderContext(sim, offscreen=True, opengl_backend='glfw')
    assert len(sim.render_contexts) == 1
    assert sim.render_contexts[0] is render_context
    assert isinstance(render_context.opengl_context, GlfwContext)

    compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_glfw_context.png')
    assert len(sim.render_contexts) == 1
    assert sim.render_contexts[0] is render_context
示例#21
0
def test_glfw_context():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    render_context = MjRenderContext(sim, offscreen=True, opengl_backend='glfw')
    assert len(sim.render_contexts) == 1
    assert sim.render_contexts[0] is render_context
    assert isinstance(render_context.opengl_context, GlfwContext)

    compare_imgs(sim.render(201, 205, camera_name="topcam"), 'test_glfw_context.png')
    assert len(sim.render_contexts) == 1
    assert sim.render_contexts[0] is render_context
def _render_rgb(sim: mujoco_py.MjSim, camera: str, render_height: int,
                render_width: int, world_xml: ET.Element):
    lm = LightModder(sim)
    cam_names = [c.attrib['name'] for c in world_xml.findall(".//camera")]
    # lights off for all other cameras except the recording one
    for cam in cam_names:
        light_name = _get_cam_light_name(cam)
        if light_name in sim.model.light_names:
            lm.set_active(light_name, 1 if cam == camera else 0)
    # take screenshot
    frame = sim.render(render_width * 2, render_height, camera_name=camera)
    # reset camera lights
    for cam in cam_names:
        light_name = _get_cam_light_name(cam)
        if light_name in sim.model.light_names:
            lm.set_active(light_name, 1)
    return frame
class SimWorker(object):
    def __init__(self):
        print('created worker')

    def create_sim(self):
        self.sim = MjSim(
            load_model_from_path(
                '/mount/visual_mpc/mjc_models/cartgripper_noautogen.xml'))
        # self.sim = MjSim(load_model_from_path('/mnt/sda1/visual_mpc/mjc_models/cartgripper_noautogen.xml'))

    def run_sim(self):
        print('startsim')
        images = []
        for i in range(5):
            self.sim.step()
            images.append(self.sim.render(100, 100))
        return images
示例#24
0
def test_textures():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.premod.png')

    random_state = np.random.RandomState(0)
    modder = TextureModder(sim, random_state=random_state)
    modder.whiten_materials()
    modder.whiten_materials(['g1', 'g2'])

    modder.set_rgb('g1', (255, 0, 0))
    modder.set_rgb('g2', (0, 255, 0))
    modder.set_rgb('g3', (0, 0, 255))
    modder.set_rgb('g4', (255, 0, 255))
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rgb.png')

    modder.set_checker('g1', (255, 0, 0), (0, 255, 0))
    modder.set_gradient('g2', (0, 255, 0), (0, 0, 255), vertical=True)
    modder.set_gradient('g3', (255, 255, 0), (0, 0, 255), vertical=False)
    modder.set_noise('g4', (0, 0, 255), (255, 0, 0), 0.1)
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.variety.png')

    modder.rand_checker('g1')
    modder.rand_gradient('g2')
    modder.rand_noise('g3')
    modder.rand_rgb('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_specific.png')

    modder.rand_all('g1')
    modder.rand_all('g2')
    modder.rand_all('g3')
    modder.rand_all('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_all.png')

    modder.rand_checker('g1')
    modder.rand_checker('g2')
    modder.rand_checker('g3')
    modder.rand_checker('g4')
    mat_modder = MaterialModder(sim, random_state=random_state)
    mat_modder.rand_texrepeat('g1')
    mat_modder.rand_texrepeat('g2')
    mat_modder.rand_texrepeat('g3')
    mat_modder.rand_texrepeat('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_texrepeat.png')
示例#25
0
def test_textures():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model)
    sim.forward()

    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.premod.png')

    random_state = np.random.RandomState(0)
    modder = TextureModder(sim, random_state=random_state)
    modder.whiten_materials()
    modder.whiten_materials(['g1', 'g2'])

    modder.set_rgb('g1', (255, 0, 0))
    modder.set_rgb('g2', (0, 255, 0))
    modder.set_rgb('g3', (0, 0, 255))
    modder.set_rgb('g4', (255, 0, 255))
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rgb.png')

    modder.set_checker('g1', (255, 0, 0), (0, 255, 0))
    modder.set_gradient('g2', (0, 255, 0), (0, 0, 255), vertical=True)
    modder.set_gradient('g3', (255, 255, 0), (0, 0, 255), vertical=False)
    modder.set_noise('g4', (0, 0, 255), (255, 0, 0), 0.1)
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.variety.png')

    modder.rand_checker('g1')
    modder.rand_gradient('g2')
    modder.rand_noise('g3')
    modder.rand_rgb('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_specific.png')

    modder.rand_all('g1')
    modder.rand_all('g2')
    modder.rand_all('g3')
    modder.rand_all('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_all.png')

    modder.rand_checker('g1')
    modder.rand_checker('g2')
    modder.rand_checker('g3')
    modder.rand_checker('g4')
    mat_modder = MaterialModder(sim, random_state=random_state)
    mat_modder.rand_texrepeat('g1')
    mat_modder.rand_texrepeat('g2')
    mat_modder.rand_texrepeat('g3')
    mat_modder.rand_texrepeat('g4')
    compare_imgs(sim.render(201, 205, camera_name="topcam"),
                 'test_textures.rand_texrepeat.png')
示例#26
0
class Mujoco_Renderer():
    def __init__(self, mujoco_xml, hp):
        from mujoco_py import load_model_from_path, MjSim
        mujoco_xml = '/'.join(str.split(gcp.__file__, '/')[:-1]) + '/' + mujoco_xml
        self.sim = MjSim(load_model_from_path(mujoco_xml))
        self._hp = hp

    def render(self, qpos):
        sim_state = self.sim.get_state()
        sim_state.qpos[:2] = qpos
        sim_state.qvel[:] = np.zeros_like(self.sim.data.qvel)
        self.sim.set_state(sim_state)
        self.sim.forward()
        
        subgoal_image = self.sim.render(self._hp.mpar.img_sz, self._hp.mpar.img_sz, camera_name='maincam')
        # plt.imshow(subgoal_image)
        # plt.savefig('test.png')
        return subgoal_image
示例#27
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self,
                 env_name,
                 rand_seed,
                 maximum_length,
                 model_path,
                 frame_skip,
                 misc_info={}):
        self._env_name = env_name
        self._rand_seed = rand_seed
        self._maximum_length = maximum_length
        self._misc_info = misc_info

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()

        # Why is this here? Causes errors
        #observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        #assert not done
        #self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        # annoying should replace
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed(self._rand_seed)

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(self._rand_seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def get_env_state(self):
        """
        Get full state of the environment beyond qpos and qvel
        For example, if targets are defined using sites, this function should also
        contain location of the sites (which are not included in qpos).
        Must return a dictionary that can be used in the set_env_state function
        """
        raise NotImplementedError

    def set_env_state(self, state):
        """
        Uses the state dictionary to set the state of the world
        """
        raise NotImplementedError

    # -----------------------------

    def reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob, 0, False, {}

    def reset_soft(self, seed=None):
        return self._old_obs, 0, False, {}

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640, 480),
                                   mode='exploration',
                                   save_loc='./tmp/',
                                   filename='newvid',
                                   it=0,
                                   camera_name=None):

        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o, *_ = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy(o)
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=frame_size[0],
                                             height=frame_size[1],
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + str(it) + ".mp4"

            if not os.path.exists(save_loc):
                os.makedirs(save_loc)

            imageio.mimwrite(file_name, np.asarray(arrs), fps=10.0)
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))
class lab_env():
    def __init__(self, env, args):
        #super(lab_env, self).__init__(env)
        # The real-world simulator
        self.model = load_model_from_path('xmls/lab_env.xml')
        self.sim = MjSim(self.model)
        # Used to locate the gripper
        self.model2 = load_model_from_path('xmls/light_env.xml')
        self.sim2 = MjSim(self.model2)

    def reset(self, task_id):
        self.task = task_id
        self.grasping = -1
        self.last_grasp = -1
        # Configure gravity
        for i in range(4):
            self.sim.data.ctrl[i] = -1
        # Configure joint positions
        for i in range(42):
            self.sim.data.qpos[i] = initial_pos[i]
        for i in range(3):
            self.sim.data.qpos[i] = joint_pos[task_id][i]
        self.pos_forward()
        self.sim.forward()

        remapped_pos = [
            remap(self.sim.data.qpos[0], -30 / 180 * math.pi,
                  45 / 180 * math.pi, -1, 1),
            remap(self.sim.data.qpos[1], -105 / 180 * math.pi,
                  -50 / 180 * math.pi, -1, 1),
            remap(self.sim.data.qpos[2], 0 / 180 * math.pi,
                  180 / 180 * math.pi, -1, 1), 0
        ]

        return (remapped_pos, ) + self.get_state()

    def step(self, action):
        self.sim.data.qpos[0] = remap(action[0], -1, 1, -30 / 180 * math.pi,
                                      45 / 180 * math.pi)
        self.sim.data.qpos[1] = remap(action[1], -1, 1, -105 / 180 * math.pi,
                                      -50 / 180 * math.pi)
        self.sim.data.qpos[2] = remap(action[2], -1, 1, 0 / 180 * math.pi,
                                      180 / 180 * math.pi)

        self.pos_forward()
        self.sim.forward()

        if action[3] < self.last_grasp or self.grasping == -1:
            t = int(remap(action[3], -1, 1, 0, grasp_steps))
            for i in range(6, 14):
                self.sim.data.qpos[i] = 0
            self.sim.forward()
            self.grasping = -1
            self.sim.data.ctrl[4] = 1
            self.sim.data.ctrl[5] = 1
            backup = [
                self.sim.data.qpos[i]
                for i in [15, 16, 22, 23, 29, 30, 36, 37]
            ]

            for i in range(t):
                self.sim.step()
                stop = False
                for j in range(4):
                    if self.sim.data.sensordata[j] > sensor_threshold:
                        self.grasping = j
                        self.pickuppos = [
                            self.sim.data.qpos[i]
                            for i in (list(range(6)) + list(
                                range(14 + 7 * self.grasping, 21 +
                                      7 * self.grasping)))
                        ]
                        stop = True
                        break
                for i in range(4):
                    for j in range(2):
                        self.sim.data.qpos[15 + 7 * i + j] = backup[i * 2 + j]
                if stop:
                    break
            self.gripper_sync()
            self.sim.forward()

            self.sim.data.ctrl[4] = 0
            self.sim.data.ctrl[5] = 0

        self.last_grasp = action[3]

        return self.get_state()

    # Ensure that the gripper is always heading down and is parallar to the desk edge
    def pos_forward(self):
        self.sim.data.qpos[
            3] = math.pi * 1.5 - self.sim.data.qpos[1] - self.sim.data.qpos[2]
        self.sim.data.qpos[4] = math.pi * 1.5
        self.sim.data.qpos[5] = math.pi * 1.25 + self.sim.data.qpos[0]
        self.gripper_sync()

        if self.grasping != -1:
            current_xyz = pos_to_xyz(self.sim.data.qpos[0:6])
            old_xyz = pos_to_xyz(self.pickuppos[0:6])
            for i in range(3):
                self.sim.data.qpos[
                    14 + 7 * self.grasping +
                    i] = self.pickuppos[6 + i] + current_xyz[i] - old_xyz[i]
            '''
            old_quat = quaternion(self.pickuppos[9], vector(self.pickuppos[10], self.pickuppos[11], self.pickuppos[12]))
            rotate_quat = quaternion(math.cos(self.sim.data.qpos[0] - self.pickuppos[0]), vector(0, 0, math.sin(self.sim.data.qpos[0] - self.pickuppos[0])))
            new_quat = rotate_quat.mul(old_quat)
            self.sim.data.qpos[17 + 7 * self.grasping] = new_quat.w
            self.sim.data.qpos[18 + 7 * self.grasping] = new_quat.v.x
            self.sim.data.qpos[19 + 7 * self.grasping] = new_quat.v.y
            self.sim.data.qpos[20 + 7 * self.grasping] = new_quat.v.z
            '''

    def gripper_sync(self):
        self.sim.data.qpos[9] = gripper_consistent(self.sim.data.qpos[6:9])
        self.sim.data.qpos[13] = gripper_consistent(self.sim.data.qpos[10:13])

    def get_state(self):
        sync(self.sim, self.sim2, 6)
        # Locate the gripper, render twice to overcome bugs in mujoco
        image_1 = copy.deepcopy(
            self.sim.render(width=960,
                            height=720,
                            camera_name='workbench_camera'))
        image_1 = copy.deepcopy(
            self.sim.render(width=960,
                            height=720,
                            camera_name='workbench_camera'))
        image_2 = copy.deepcopy(
            self.sim.render(width=960, height=720, camera_name='upper_camera'))
        image_2 = copy.deepcopy(
            self.sim.render(width=960, height=720, camera_name='upper_camera'))
        image_3 = copy.deepcopy(
            self.sim2.render(width=960,
                             height=720,
                             camera_name='workbench_camera'))
        image_3 = copy.deepcopy(
            self.sim2.render(width=960,
                             height=720,
                             camera_name='workbench_camera'))
        x1, y1 = get_x_y(image_3)
        image_4 = copy.deepcopy(
            self.sim2.render(width=960, height=720,
                             camera_name='upper_camera'))
        image_4 = copy.deepcopy(
            self.sim2.render(width=960, height=720,
                             camera_name='upper_camera'))
        x2, y2 = get_x_y(image_4)
        # Crop gripper images and add noise
        image_1 = cv2.GaussianBlur(
            gaussian_noise(
                crop(image_1, x1 + fig_size_1[0] // 2, y1, *fig_size_1),
                *gaussian_noise_parameters),
            *gaussian_blur_prarmeters).transpose((2, 0, 1))
        image_2 = cv2.GaussianBlur(
            gaussian_noise(
                crop(image_2, x2 + fig_size_2[0] // 2, y2, *fig_size_2),
                *gaussian_noise_parameters),
            *gaussian_blur_prarmeters).transpose((2, 0, 1))

        danger = int(self.safety_check() or math.isnan(x1) or math.isnan(y1)
                     or math.isnan(x2) or math.isnan(y2))
        return [x1, y1, x2, y2,
                int(self.grasping == self.task), danger], (image_1, image_2)

    def safety_check(self):
        # return 0 if safe, otherwise 1
        backup = [self.sim.data.qpos[i] for i in range(14)]
        self.sim.step()
        s = 0
        for i in range(6):
            s += abs(backup[i] - self.sim.data.qpos[i])
            self.sim.data.qpos[i] = backup[i]
        return s > safety_threshold

    def force_close(self):
        for i in range(2):
            for j in range(3):
                self.sim.data.qpos[6 + i * 4 + j] = closed_pos[j]
        self.gripper_sync()
        self.sim.forward()
class BaseMujocoEnv(BaseEnv):
    def __init__(self, model_path, _hp):
        self._frame_height = _hp.viewer_image_height
        self._frame_width = _hp.viewer_image_width

        self._reset_sim(model_path)

        self._base_adim, self._base_sdim = None, None  #state/action dimension of Mujoco control
        self._adim, self._sdim = None, None  #state/action dimension presented to agent
        self.num_objects, self._n_joints = None, None
        self._goal_obj_pose = None
        self._goaldistances = []

        self._ncam = _hp.ncam
        if self._ncam == 2:
            self.cameras = ['maincam', 'leftcam']
        elif self._ncam == 1:
            self.cameras = ['maincam']
        else:
            raise ValueError

        self._last_obs = None
        self._hp = _hp

    def _default_hparams(self):
        parent_params = super()._default_hparams()
        parent_params.add_hparam('viewer_image_height', 480)
        parent_params.add_hparam('viewer_image_width', 640)
        parent_params.add_hparam('ncam', 1)

        return parent_params

    def set_goal_obj_pose(self, pose):
        self._goal_obj_pose = pose

    def _reset_sim(self, model_path):
        """
        Creates a MjSim from passed in model_path
        :param model_path: Absolute path to model file
        :return: None
        """
        self._model_path = model_path
        self.sim = MjSim(load_model_from_path(self._model_path))

    def reset(self):
        self._goaldistances = []

    def render(self):
        """ Renders the enviornment.
        Implements custom rendering support. If mode is:

        - dual: renders both left and main cameras
        - left: renders only left camera
        - main: renders only main (front) camera
        :param mode: Mode to render with (dual by default)
        :return: uint8 numpy array with rendering from sim
        """
        images = np.zeros(
            (self._ncam, self._frame_height, self._frame_width, 3),
            dtype=np.uint8)
        for i, cam in enumerate(self.cameras):
            images[i] = self.sim.render(self._frame_width,
                                        self._frame_height,
                                        camera_name=cam)
        return images

    def project_point(self, point, camera):
        model_matrix = np.zeros((4, 4))
        model_matrix[:3, :3] = self.sim.data.get_camera_xmat(camera).T
        model_matrix[-1, -1] = 1

        fovy_radians = np.deg2rad(
            self.sim.model.cam_fovy[self.sim.model.camera_name2id(camera)])
        uh = 1. / np.tan(fovy_radians / 2)
        uw = uh / (self._frame_width / self._frame_height)
        extent = self.sim.model.stat.extent
        far, near = self.sim.model.vis.map.zfar * extent, self.sim.model.vis.map.znear * extent
        view_matrix = np.array([
            [uw, 0., 0., 0.],  # matrix definition from
            [
                0., uh, 0., 0.
            ],  # https://stackoverflow.com/questions/18404890/how-to-build-perspective-projection-matrix-no-api
            [0., 0., far / (far - near), -1.],
            [0., 0., -2 * far * near / (far - near), 0.]
        ])  # Note Mujoco doubles this quantity

        MVP_matrix = view_matrix.dot(model_matrix)
        world_coord = np.ones((4, 1))
        world_coord[:3, 0] = point - self.sim.data.get_camera_xpos(camera)

        clip = MVP_matrix.dot(world_coord)
        ndc = clip[:3] / clip[3]  # everything should now be in -1 to 1!!
        col, row = (ndc[0] + 1) * self._frame_width / 2, (
            -ndc[1] + 1) * self._frame_height / 2

        return self._frame_height - row, col  # rendering flipped around in height

    def get_desig_pix(self, target_width, round=True, obj_poses=None):
        qpos_dim = self._n_joints  # the states contains pos and vel
        assert self.sim.data.qpos.shape[0] == qpos_dim + 7 * self.num_objects
        desig_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int)
        ratio = self._frame_width / target_width
        for icam, cam in enumerate(self.cameras):
            for i in range(self.num_objects):
                if obj_poses is None:
                    fullpose = self.sim.data.qpos[i * 7 +
                                                  qpos_dim:(i + 1) * 7 +
                                                  qpos_dim].squeeze()
                    chosen_point = fullpose[:3]
                else:
                    chosen_point = obj_poses[i, :3]
                d = self.project_point(chosen_point, cam)
                d = np.stack(d) / ratio
                if round:
                    d = np.around(d).astype(np.int)
                desig_pix[icam, i] = d.squeeze()
        return desig_pix

    def get_goal_pix(self, target_width, round=True):
        goal_pix = np.zeros([self._ncam, self.num_objects, 2], dtype=np.int)
        ratio = self._frame_width / target_width
        for icam, cam in enumerate(self.cameras):
            for i in range(self.num_objects):
                g = self.project_point(self._goal_obj_pose[i, :3], cam)
                g = np.stack(g) / ratio
                if round:
                    g = np.around(g).astype(np.int)
                goal_pix[icam, i] = g.squeeze()
        return goal_pix

    def eval(self, target_width=None, save_dir=None, ntasks=None):
        self._goaldistances.append(self.get_distance_score())
        stats = {}
        stats['improvement'] = self._goaldistances[0] - self._goaldistances[-1]
        stats['initial_dist'] = self._goaldistances[0]
        stats['final_dist'] = self._goaldistances[-1]
        return stats

    def get_distance_score(self):
        """
        :return:  mean of the distances between all objects and goals
        """
        abs_distances = []
        for i_ob in range(self.num_objects):
            goal_pos = self._goal_obj_pose[i_ob, :3]
            curr_pos = self.sim.data.qpos[self._n_joints +
                                          i_ob * 7:self._n_joints + 3 +
                                          i_ob * 7]
            abs_distances.append(np.linalg.norm(goal_pos - curr_pos))
        return np.mean(np.array(abs_distances))

    def snapshot_noarm(self):
        raise NotImplementedError

    @property
    def adim(self):
        return self._adim

    @property
    def sdim(self):
        return self._sdim

    @property
    def ncam(self):
        return self._ncam
示例#30
0
class MazeNavigation(Env, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        self.hist = self.cost = self.done = self.time = self.state = None

        dirname = os.path.dirname(__file__)
        filename = os.path.join(dirname, 'assets/simple_maze.xml')
        self.sim = MjSim(load_model_from_path(filename))
        self.horizon = HORIZON
        self._max_episode_steps = self.horizon
        self.transition_function = get_offline_data
        self.steps = 0
        self.images = not GT_STATE
        self.action_space = Box(-MAX_FORCE * np.ones(2),
                                MAX_FORCE * np.ones(2))
        self.transition_function = get_offline_data
        obs = self._get_obs()
        self.dense_reward = DENSE_REWARD

        if self.images:
            self.observation_space = obs.shape
        else:
            self.observation_space = Box(-0.3, 0.3, shape=obs.shape)

        self.gain = 1.05
        self.goal = np.zeros((2, ))
        self.goal[0] = 0.25
        self.goal[1] = 0

    def step(self, action):
        action = process_action(action)
        self.sim.data.qvel[:] = 0
        self.sim.data.ctrl[:] = action
        cur_obs = self._get_obs()
        constraint = int(self.sim.data.ncon > 3)
        if not constraint:
            for _ in range(500):
                self.sim.step()
        obs = self._get_obs()
        self.sim.data.qvel[:] = 0
        self.steps += 1
        constraint = int(self.sim.data.ncon > 3)
        self.done = self.steps >= self.horizon or constraint or (
            self.get_distance_score() < GOAL_THRESH)
        if not self.dense_reward:
            reward = -(self.get_distance_score() > GOAL_THRESH).astype(float)
        else:
            reward = -self.get_distance_score()

        info = {
            "constraint": constraint,
            "reward": reward,
            "state": cur_obs,
            "next_state": obs,
            "action": action,
            "success": reward > -0.03
        }

        return obs, reward, self.done, info

    def _get_obs(self, images=False):
        if images:
            return self.sim.render(64, 64, camera_name="cam0")
        # Joint poisitions and velocities
        state = np.concatenate(
            [self.sim.data.qpos[:].copy(), self.sim.data.qvel[:].copy()])

        if not self.images and not images:
            return state[:2]  # State is just (x, y) position

        # Get images
        ims = self.sim.render(64, 64, camera_name="cam0")
        return ims / 255

    def reset(self, difficulty='h', check_constraint=True, pos=()):
        if len(pos):
            self.sim.data.qpos[0] = pos[0]
            self.sim.data.qpos[1] = pos[1]
        else:
            if difficulty is None:
                self.sim.data.qpos[0] = np.random.uniform(-0.27, 0.27)
            elif difficulty == 'e':
                self.sim.data.qpos[0] = np.random.uniform(0.14, 0.22)
            elif difficulty == 'm':
                self.sim.data.qpos[0] = np.random.uniform(-0.04, 0.04)
            elif difficulty == 'h':
                self.sim.data.qpos[0] = np.random.uniform(-0.22, -0.13)
            self.sim.data.qpos[1] = np.random.uniform(-0.22, 0.22)

        self.steps = 0

        w1 = -0.08
        w2 = 0.08
        self.sim.model.geom_pos[5, 1] = 0.5 + w1
        self.sim.model.geom_pos[7, 1] = -0.25 + w1
        self.sim.model.geom_pos[6, 1] = 0.4 + w2
        self.sim.model.geom_pos[8, 1] = -0.25 + w2
        self.sim.forward()
        constraint = int(self.sim.data.ncon > 3)
        if constraint and check_constraint:
            if not len(pos):
                self.reset(difficulty)

        return self._get_obs()

    def get_distance_score(self):
        """
        :return:  mean of the distances between all objects and goals
            """
        d = np.sqrt(np.mean((self.goal - self.sim.data.qpos[:])**2))
        return d

    def expert_action(self):
        st = self.sim.data.qpos[:]
        if st[0] <= -0.151:
            delt = (np.array([-0.15, -0.125]) - st)
        elif st[0] <= 0.149:
            delt = (np.array([0.15, 0.125]) - st)
        else:
            delt = (np.array([self.goal[0], self.goal[1]]) - st)
        act = self.gain * delt

        return act
示例#31
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip=1, xml_string=""):
        """
        @param model_path path of the default model
        @param xml_string if given, the model will be reset using these values
        """

        fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.model = load_model_from_path(fullpath)
        with open(fullpath, 'r') as f:
            self.model_xml = f.read()
            self.default_xml = self.model_xml

        if xml_string != "":
            self.model = load_model_from_xml(xml_string)
            self.model_xml = xml_string

        self.frame_skip = frame_skip

        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.viewer = None
        self._viewers = {}

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([
            o.size for o in observation
        ]) if type(observation) is tuple else observation.size

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()
        self.set_param_space()

    def get_params(self):
        """
        Returns a dict of (param_name, param_value)
        """
        return MujocoUpdater(self.model_xml).get_params()

    def set_params(self, params):
        """
        @param params: dict(param_name, param_value)
        param_name should be a string of bodyname__type__paramname
        where type is either geom or joint,
        e.g. thigh__joint__friction,
        and param_value is a numpy array
        """
        # invalidate cached properties
        self.__dict__.pop('action_space', None)
        self.__dict__.pop('observation_space', None)

        new_xml = MujocoUpdater.set_params(self.model_xml, params)
        self.__init__(xml_string=new_xml)
        self.reset()
        return self

    def set_param_space(self, param_space=None, eps_scale=0.5, replace=True):
        """
        Set param_space
        @param param_space: dict(string, gym.space.base.Space)
        @param eps_scale: scale of variation applied to all params
        @param replace: if true, param_space overwrites default param_space.
                        Default behavior is to merge.
        """
        if param_space is not None:
            if replace:
                self._param_space = param_space
            else:
                self._param_space = {**self._param_space, **param_space}
        else:
            params = MujocoUpdater(self.model_xml).get_params()
            self._param_space = dict()
            for param, value in params.items():
                eps = np.abs(value) * eps_scale
                ub = value + eps
                lb = value - eps
                for name in positive_params:
                    if name in param:
                        lb = np.clip(lb, 1e-3, ub)
                        break
                space = spaces.Box(lb, ub)
                self._param_space[param] = space

    def get_geom_params(self, body_name):
        geom = MujocoUpdater(self.model_xml).get_geom(body_name)
        return {
            k: v
            for k, v in geom.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_joint_params(self, body_name):
        joint = MujocoUpdater(self.model_xml).get_joint(body_name)
        return {
            k: v
            for k, v in joint.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_body_names(self):
        return MujocoUpdater(self.model_xml).get_body_names()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.sim.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.sim.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate(
            [self.sim.data.qpos.flat, self.sim.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.sim.data.qpos,
            self.sim.data.qvel,
            self.sim.data.qacc,
            self.sim.data.ctrl,
        ]).ravel()

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def reset(self, randomize=True):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    # Added for bayesian_rl
    def get_sim_state(self):
        return self.sim.get_state()

    # Added for bayesian_rl
    def set_sim_state(self, state):
        self.sim.set_state(state)

    # Added for bayesian_rl
    def set_state_vector(self, state_vector):
        qpos = state_vector[:self.model.nq]
        qvel = state_vector[self.model.nq:]
        self.set_state(qpos, qvel)

    # Added for bayesian_rl
    def get_state_vector(self):
        return self.state_vector()

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
            if mode == 'human':
                self.viewer = mujoco_py.MjViewer(self.sim)
            elif mode == 'rgb_array' or mode == 'depth_array':
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

            self.viewer_setup()
            self._viewers[mode] = self.viewer
        return self.viewer

    def close(self):
        if self.viewer is not None:
            # self.viewer.finish()
            self.viewer = None
            self._viewers = {}

    # def step(self, a):
    #     return self._step(a)

    # Added for bayesian_rl
    def take_action(self, a):
        self.step(a)
        return self.get_sim_state()

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['mean']
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=640,
                                             height=480,
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + '_' + str(ep) + ".mp4"
            skvideo.io.vwrite(file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))

    def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
        if mode == 'rgb_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
        elif mode == 'depth_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            # Extract depth part of the read_pixels() tuple
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=True)[1]
            # original image is upside-down, so flip it
            return data[::-1, :]
        elif mode == 'human':
            self._get_viewer(mode).render()
示例#32
0
class MujocoEnv(gym.Env):
    FILE = None

    @autoargs.arg('action_noise',
                  type=float,
                  help='Noise added to the controls, which will be '
                  'proportional to the action bounds')
    def __init__(self, action_noise=0.0, file_path=None, template_args=None):
        # compile template
        if file_path is None:
            if self.__class__.FILE is None:
                raise NotImplementedError("Mujoco file not specified")
            file_path = osp.join(MODEL_DIR, self.__class__.FILE)
        if file_path.endswith(".mako"):
            lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR])
            with open(file_path) as template_file:
                template = mako.template.Template(template_file.read(),
                                                  lookup=lookup)
            content = template.render(
                opts=template_args if template_args is not None else {}, )
            tmp_f, file_path = tempfile.mkstemp(suffix=".xml", text=True)
            with open(file_path, 'w') as f:
                f.write(content)
            self.model = load_model_from_path(file_path)
            os.close(tmp_f)
        else:
            self.model = load_model_from_path(file_path)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.render_width = 512
        self.render_height = 512
        self.render_camera = None
        self.init_qpos = self.sim.data.qpos
        self.init_qvel = self.sim.data.qvel
        self.init_qacc = self.sim.data.qacc
        self.init_ctrl = self.sim.data.ctrl
        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size
        self.action_noise = action_noise
        self.frame_skip = 1
        self.dcom = None
        self.current_com = None
        self.reset()
        super().__init__()

    @cached_property
    @overrides
    def action_space(self):
        bounds = self.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return gym.spaces.Box(lb, ub, dtype=np.float32)

    @cached_property
    @overrides
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return gym.spaces.Box(ub * -1, ub, dtype=np.float32)

    @property
    def action_bounds(self):
        assert isinstance(self.action_space, gym.spaces.Box)
        return self.action_space.low, self.action_space.high

    def reset_mujoco(self, init_state=None):
        self.sim.reset()
        if init_state is None:
            self.sim.data.qpos[:] = self.init_qpos + np.random.normal(
                size=self.init_qpos.shape) * 0.01
            self.sim.data.qvel[:] = self.init_qvel + np.random.normal(
                size=self.init_qvel.shape) * 0.1
            self.sim.data.qacc[:] = self.init_qacc
            self.sim.data.ctrl[:] = self.init_ctrl
        else:
            start = 0
            for datum_name in ["qpos", "qvel", "qacc", "ctrl"]:
                datum = getattr(self.sim.data, datum_name)
                datum_dim = datum.shape[0]
                datum = init_state[start:start + datum_dim]
                setattr(self.sim.data, datum_name, datum)
                start += datum_dim

    @overrides
    def reset(self, init_state=None):
        self.reset_mujoco(init_state)
        self.sim.forward()
        self.current_com = self.sim.data.subtree_com[0]
        self.dcom = np.zeros_like(self.current_com)
        return self.get_current_obs()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.sim.data
        cdists = np.copy(self.sim.model.geom_margin).flat
        for c in self.sim.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            data.cinert.flat,
            data.cvel.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate(
            [self.sim.data.qpos.flat, self.sim.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.sim.data.qpos,
            self.sim.data.qvel,
            self.sim.data.qacc,
            self.sim.data.ctrl,
        ]).ravel()

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        self.sim.data.ctrl[:] = self.inject_action_noise(action)
        for _ in range(self.frame_skip):
            self.sim.step()
        self.sim.forward()
        new_com = self.sim.data.subtree_com[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        return self.viewer

    def render(self, close=False, mode='human'):
        if mode == 'human':
            viewer = self.get_viewer()
            viewer.render()
            return None
        elif mode == 'rgb_array':
            img = self.sim.render(self.render_width,
                                  self.render_height,
                                  camera_name=self.render_camera)
            img = img[::-1, :, :]
            return img
        if close:
            self.stop_viewer()
        return None

    def start_viewer(self):
        viewer = self.get_viewer()
        if not viewer.running:
            viewer.start()

    def stop_viewer(self):
        if self.viewer:
            self.viewer.finish()

    def release(self):
        # temporarily alleviate the issue (but still some leak)
        functions.mj_deleteModel(self.sim._wrapped)
        functions.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        return self.data.get_body_xmat(body_name).reshape((3, 3))

    def get_body_com(self, body_name):
        return self.data.get_body_xpos(body_name)

    def get_body_comvel(self, body_name):
        return self.data.get_body_xvelp(body_name)

    def print_stats(self):
        super(MujocoEnv, self).print_stats()
        print("qpos dim:\t%d" % len(self.sim.data.qpos))

    def action_from_key(self, key):
        raise NotImplementedError
示例#33
0
class BlockPickAndPlaceEnv():
    def __init__(self,
                 num_objects,
                 num_colors,
                 img_dim,
                 include_z,
                 random_initialize=False,
                 view=False):
        # self.asset_path = os.path.join(os.getcwd(), '../mujoco_data/stl/')
        self.asset_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), '../mujoco_data/stl/')
        # self.asset_path = os.path.join(os.path.realpath(__file__), 'mujoco_data/stl/')
        # self.asset_path = '../mujoco_data/stl/'
        self.img_dim = img_dim
        self.include_z = include_z
        self.polygons = ['cube', 'horizontal_rectangle', 'tetrahedron'][:1]
        self.num_colors = num_colors
        self.num_objects = num_objects
        self.view = view

        #Hyper-parameters
        self.internal_steps_per_step = 2000
        self.drop_height = 3
        self.pick_height = 0.59
        self.bounds = {
            'x_min': -2.5,
            'x_max': 2.5,
            'y_min': 1.0,
            'y_max': 4.0,
            'z_min': 0.05,
            'z_max': 2.2
        }
        self.TOLERANCE = 0.2
        self.wall_pos = np.array([2, 0, 0])

        self.names = []
        self.blocks = []
        self._blank_observation = None

        if random_initialize:
            self.reset()

    ####Env initialization functions
    def get_unique_name(self, polygon):
        i = 0
        while '{}_{}'.format(polygon, i) in self.names:
            i += 1
        name = '{}_{}'.format(polygon, i)
        self.names.append(name)
        return name

    def add_mesh(self, polygon, pos, quat, rgba):
        name = self.get_unique_name(polygon)
        self.blocks.append({
            'name': name,
            'polygon': polygon,
            'pos': np.array(pos),
            'quat': np.array(quat),
            'rgba': rgba,
            'material': name
        })

    def get_asset_material_str(self):
        asset_base = '<material name="{}" rgba="{}" specular="0" shininess="0" emission="0.25"/>'
        asset_list = [
            asset_base.format(a['name'], self.convert_to_str(a['rgba']))
            for a in self.blocks
        ]
        asset_str = '\n'.join(asset_list)
        return asset_str

    def get_asset_mesh_str(self):
        asset_base = '<mesh name="{}" scale="0.6 0.6 0.6" file="{}"/>'
        asset_list = [
            asset_base.format(
                a['name'], os.path.join(self.asset_path,
                                        a['polygon'] + '.stl'))
            for a in self.blocks
        ]
        asset_str = '\n'.join(asset_list)
        return asset_str

    def get_body_str(self):
        body_base = '''
          <body name='{}' pos='{}' quat='{}'>
            <joint type='free' name='{}'/>
            <geom name='{}' type='mesh' mesh='{}' pos='0 0 0' quat='1 0 0 0' material='{}' 
            condim='3' friction='1 1 1' solimp="0.998 0.998 0.001" solref="0.02 1"/>
          </body>
        '''
        body_list = [
            body_base.format(m['name'], self.convert_to_str(m['pos']),
                             self.convert_to_str(m['quat']), m['name'],
                             m['name'], m['name'], m['material'])
            for i, m in enumerate(self.blocks)
        ]
        body_str = '\n'.join(body_list)
        return body_str

    def convert_to_str(self, an_iterable):
        tmp = ""
        for an_item in an_iterable:
            tmp += str(an_item) + " "
        # tmp = " ".join(str(an_iterable))
        return tmp[:-1]

    def get_random_pos(self, height=None):
        x = np.random.uniform(self.bounds['x_min'], self.bounds['x_max'])
        y = np.random.uniform(self.bounds['y_min'], self.bounds['y_max'])
        if height is None:
            z = np.random.uniform(self.bounds['z_min'], self.bounds['z_max'])
        else:
            z = height
        return np.array([x, y, z])

    def get_random_rbga(self, num_colors):
        rgb = list(pickRandomColor(num_colors))
        return rgb + [1]

    def initialize(self, use_cur_pos):
        tmp = MODEL_XML_BASE.format(self.get_asset_mesh_str(),
                                    self.get_asset_material_str(),
                                    self.get_body_str())
        model = load_model_from_xml(tmp)
        self.sim = MjSim(model)
        self._blank_observation = self.get_observation()
        if self.view:
            self.viewer = MjViewer(self.sim)
        else:
            self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

        # self.sim_state = self.sim.get_state()
        self._get_starting_step(use_cur_pos)

    def _get_starting_step(self, use_cur_pos):
        prev_positions = {}
        for i, aname in enumerate(self.names):
            if use_cur_pos:
                prev_positions[aname] = self.get_block_info(aname)["pos"]
            self.add_block(aname, [-5 + i, -5 + i, -5])

        for aname in self.names:
            if use_cur_pos:
                tmp_pos = prev_positions[aname]
                # print(aname, tmp_pos)
            else:
                tmp_pos = self.get_random_pos(self.drop_height)
            self.add_block(aname, tmp_pos)
            for i in range(self.internal_steps_per_step):
                self.internal_step()
                if self.view:
                    self.viewer.render()
            # self.sim_state = self.sim.get_state()

    ####Env internal step functions
    def add_block(self, ablock, pos):
        #pos (x,y,z)
        self.set_block_info(ablock, {"pos": pos})
        # self.sim.set_state(self.sim_state)

    def pick_block(self, pos):
        block_name = None
        for a_block in self.names:
            if self.intersect(a_block, pos):
                block_name = a_block

        if block_name is None:
            return False

        #PICK_LOC = np.array([0, 0, 5])
        #info = {"pos":PICK_LOC}
        #self.set_block_info(block_name, info)
        # self.sim.set_state(self.sim_state)
        return block_name

    def intersect(self, a_block, pos):
        cur_pos = self.get_block_info(a_block)["pos"]
        return np.max(np.abs(cur_pos - pos)) < self.TOLERANCE

    def get_block_info(self, a_block):
        info = {}
        info["poly"] = a_block[:-2]
        info["pos"] = np.copy(self.sim.data.get_body_xpos(a_block))  #np array
        info["quat"] = np.copy(self.sim.data.get_body_xquat(a_block))
        info["vel"] = np.copy(self.sim.data.get_body_xvelp(a_block))
        info["rot_vel"] = np.copy(self.sim.data.get_body_xvelr(a_block))
        return info

    def set_block_info(self, a_block, info):
        # print(a_block, info)
        # print("Setting state: {}, {}".format(a_block, info))
        sim_state = self.sim.get_state()
        start_ind = self.sim.model.get_joint_qpos_addr(a_block)[0]
        if "pos" in info:
            sim_state.qpos[start_ind:start_ind + 3] = np.array(info["pos"])
        if "quat" in info:
            sim_state.qpos[start_ind + 3:start_ind + 7] = info["quat"]
        else:
            sim_state.qpos[start_ind + 3:start_ind + 7] = np.array(
                [1, 0, 0, 0])

        start_ind = self.sim.model.get_joint_qvel_addr(a_block)[0]
        if "vel" in info:
            sim_state.qvel[start_ind:start_ind + 3] = info["vel"]
        else:
            sim_state.qvel[start_ind:start_ind + 3] = np.zeros(3)
        if "rot_vel" in info:
            sim_state.qvel[start_ind + 3:start_ind + 6] = info["rot_vel"]
        else:
            sim_state.qvel[start_ind + 3:start_ind + 6] = np.zeros(3)
        self.sim.set_state(sim_state)

    def internal_step(self, action=None):
        ablock = False
        if action is None:
            self.sim.forward()
            self.sim.step()
        else:
            pick_place = action[:3]
            drop_place = action[3:]

            ablock = self.pick_block(pick_place)
            if (ablock):
                # print("Dropping: {} {}".format(ablock, drop_place))
                self.add_block(ablock, drop_place)
        # self.sim_state = self.sim.get_state()
        return ablock

    ####Env external step functions
    # Input: action (4) or (6)
    # Output: resultant observation after taking the action
    def step(self, action):
        action = self._pre_process_actions(action)
        ablock = self.internal_step(action)
        # print(self.get_env_info())
        #if ablock:
        for i in range(self.internal_steps_per_step):
            self.sim.forward()
            self.sim.step()
            # self.internal_step()
            if self.view:
                self.viewer.render()

        # self.give_down_vel()
        # for i in range(200):
        #     self.sim.forward()
        #     self.sim.step()
        # self.sim_state = self.sim.get_state()

        # for aname in self.names: #This looks incorrect TODO: CHECK THIS
        #     self.add_block(aname, self.get_block_info(aname)["pos"])
        return self.get_observation()

    # Input: action can either be (A) or (T, A) where we want to execute T actions in a row
    # Output: Single obs
    def try_step(self, actions):
        tmp = self.get_env_info()
        # cur_state = copy.deepcopy(self.sim.get_state())
        if len(actions.shape) == 1:
            self.step(actions)
        elif len(actions.shape) == 2:
            for action in actions:
                self.step(action)
        else:
            raise KeyError("Wrong shape for actions: {}".format(actions.shape))
        obs = self.get_observation()
        # self.sim.set_state(cur_state)
        self.set_env_info(tmp)
        return obs

    def reset(self):
        self.names = []
        self.blocks = []
        quat = [1, 0, 0, 0]
        for i in range(self.num_objects):
            poly = np.random.choice(self.polygons)
            pos = self.get_random_pos()
            pos[2] = -2 * (i + 1)
            self.add_mesh(poly, pos, quat,
                          self.get_random_rbga(self.num_colors))
        self.initialize(False)
        return self.get_observation()

    def get_observation(self):
        img = self.sim.render(
            self.img_dim, self.img_dim, camera_name="fixed"
        )  # img is upside down, values btwn 0-255 (D,D,3)
        img = img[::-1, :, :]  # flips image right side up (D,D,3)
        return np.ascontiguousarray(img)  # values btwn 0-255 (D,D,3)

    def get_segmentation_masks(self):
        cur_obs = self.get_observation()
        tmp = np.abs(cur_obs - self._blank_observation)  #(D,D,3)
        dif = np.where(tmp > 5, 1, 0).sum(2)  #(D,D,3)->(D,D)
        dif = np.where(dif != 0, 1.0, 0.0)
        block_seg = dif
        background_seg = 1 - dif
        return np.array([background_seg,
                         block_seg])  #Note: output btwn 0-1, (2,D,D)

    def get_obs_size(self):
        return [self.img_dim, self.img_dim]

    def get_actions_size(self):
        if self.include_z:
            return [6]
        else:
            return [4]

    # Inputs: actions (*,6)
    # Outputs: (*,6) if including z, (*,4) if not
    def _post_process_actions(self, actions):
        if self.include_z:
            return actions
        else:
            return actions[..., [0, 1, 3, 4]]

    # Inputs: actions (*,4), or (*,6)
    # Outputs: actions (*,6)
    def _pre_process_actions(self, actions):
        if actions.shape[-1] == 6:
            return actions

        full_actions = np.zeros(list(actions.shape)[:-1] + [6])  # (*,6)
        full_actions[..., [0, 1, 3, 4]] = actions
        full_actions[..., 2] = self.pick_height
        full_actions[..., 5] = self.drop_height
        return full_actions

    # Inputs: None
    # Outputs: Returns name of picked block
    #   If self.include z: Pick any random block
    #   Else: Picks a random block which can be picked up with the z pick set to self.pick_height
    def _get_rand_block_byz(self):
        if len(self.names) == 0:
            raise KeyError("No blocks in _get_rand_block_byz()!")
        if self.include_z:
            aname = np.random.choice(self.names)
        else:
            z_lim = self.pick_height
            tmp = [
                aname for aname in self.names
                if abs(self.get_block_info(aname)["pos"][2] -
                       z_lim) < self.TOLERANCE
            ]
            # while (len(tmp) == 0):
            #     z_lim += 0.5
            #     tmp = [aname for aname in self.names if self.get_block_info(aname)["pos"][2] <= z_lim]
            aname = np.random.choice(tmp)
            # print(tmp, aname)
        return aname

    # Input: action_type
    # Output: Single action either (6) or (4)
    def sample_action(self, action_type=None, include_noise=True):
        if len(self.names) == 1 and action_type == 'place_block':
            action_type = None

        if action_type == 'pick_block':  #pick block, place randomly
            # aname = np.random.choice(self.names)
            aname = self._get_rand_block_byz()
            pick = self.get_block_info(aname)["pos"]
            place = self.get_random_pos()
        elif action_type == 'place_block':  #pick block, place on top of existing block
            # aname = np.random.choice(self.names)
            aname = self._get_rand_block_byz()
            pick = self.get_block_info(aname)["pos"]  # + np.random.randn(3)/10
            names = copy.deepcopy(self.names)
            names.remove(aname)
            aname = np.random.choice(names)
            place = self.get_block_info(aname)[
                "pos"]  # + np.random.randn(3)/10
            place[5] += 2  # Each block is roughly 1 unit wide
        elif action_type == 'remove_block':
            aname = self._get_rand_block_byz()
            pick = self.get_block_info(aname)["pos"]  # + np.random.randn(3)/50
            place = [
                0, 0, -5
            ]  # Place the block under the ground to remove it from scene
        elif action_type is None:
            if self.include_z:
                pick = self.get_random_pos()
                place = self.get_random_pos()
            else:
                pick = self.get_random_pos(self.pick_height)
                place = self.get_random_pos(self.drop_height)
        else:
            raise KeyError("Wrong input action_type!")
        ac = np.array(list(pick) + list(place))
        if include_noise:
            ac += np.random.uniform(-self.TOLERANCE, self.TOLERANCE,
                                    size=6) * 1.4
        # pdb.set_trace()
        # ac[2] = 0.6
        # ac[5] = self.drop_height
        return self._post_process_actions(ac)

    #Input: mean (*), std (*)
    #Output: actions (*)
    def sample_action_gaussian(self, mean, std):
        random_a = np.random.normal(mean, std)
        # # set pick height
        # random_a[2] = 0.6
        # # set place heights
        # random_a[5] = self.drop_height
        return random_a

    # # Input: mean (*), std (*)
    # # Output: actions (*)
    # def sample_multiple_action_gaussian(self, mean, std, num_samples):
    #     #mean and std should be (T, A)
    #     random_a = np.random.normal(mean, std, [num_samples] + list(mean.shape))
    #     # set pick height
    #     random_a[:, :, 2] = 0.6
    #     # set place height
    #     random_a[:, :, 5] = self.drop_height
    #     return random_a

    def move_blocks_side(self):
        # Move blocks to either side
        z = self.drop_height
        side_pos = [[-2.2, 1.5, z], [2.2, 1.5, z], [-2.2, 3.5, z],
                    [2.2, 3.5, z]]
        # self.bounds = {'x_min':-2.5, 'x_max':2.5, 'y_min': 1.0, 'y_max' :4.0, 'z_min':0.05, 'z_max'2.2}
        place_lst = []
        for i, block in enumerate(self.names):
            place = copy.deepcopy(self.get_block_info(block)["pos"])
            place[-1] = self.drop_height
            self.add_block(block, side_pos[i])
            place_lst.append(place)
            #true_actions.append(side_pos[i] + list(place)) #Note pick & places z's might be
            # slightly
            #  off
        # sort by place height so place lowest block first

        for i in range(self.internal_steps_per_step):
            self.internal_step()
            if self.view:
                self.viewer.render()
        true_actions = []
        for i, block in enumerate(self.names):
            pick = self.get_block_info(block)["pos"]
            pick[-1] = 0.6
            place = place_lst[i]
            true_actions.append(np.concatenate([pick, place]))

        sorted(true_actions, key=lambda x: x[5])
        # print(true_actions)

        return self._post_process_actions(true_actions)

    def create_tower_shape(self):
        def get_valid_width_pos(width):
            num_pos = len(self.heights)
            possible = []
            for i in range(num_pos):
                valid = True
                for k in range(max(i - width, 0), min(i + width + 1, num_pos)):
                    if self.types[k] == "tetrahedron":
                        valid = False
                        break
                    if self.heights[i] < self.heights[k]:
                        valid = False
                        break
                    if self.heights[i] >= 3:
                        valid = False
                        break
                if valid:
                    possible.append(i)
            return possible

        def get_drop_pos(index):
            delta_x = 1
            y_val = 3
            left_most_x = -2.5
            return [left_most_x + index * delta_x, y_val, 4]

        self.names = []
        self.blocks = []

        self.heights = [0, 0, 0, 0, 0]
        self.types = [None] * 5
        self.check_clear_width = {
            'cube': 1,
            'horizontal_rectangle': 1,
            'tetrahedron': 1
        }
        self.add_height_width = {
            'cube': 0,
            'horizontal_rectangle': 1,
            'tetrahedron': 0
        }

        tmp_polygons = copy.deepcopy(
            self.polygons
        )  #['cube', 'horizontal_rectangle', 'tetrahedron'][:2]

        quat = [1, 0, 0, 0]
        for i in range(self.num_objects):
            poly = np.random.choice(tmp_polygons)
            tmp = get_valid_width_pos(self.check_clear_width[poly])
            if len(tmp) == 0:
                tmp_polygons.remove(poly)
                if len(tmp_polygons) == 0:
                    # print("DONE!")
                    break
                else:
                    continue

            tmp_polygons = copy.deepcopy(self.polygons)
            ind = np.random.choice(tmp)
            # print(poly, tmp, ind)
            self.update_tower_info(ind, poly)
            tmp_pos = get_drop_pos(ind)
            self.add_mesh(poly, tmp_pos, quat,
                          self.get_random_rbga(self.num_colors))
        self.num_objects = len(self.names)
        self.initialize(True)

    def update_tower_info(self, ind, poly):
        self.types[ind] = poly
        width = self.add_height_width[poly]
        new_height = self.heights[ind] + 1
        for i in range(max(ind - width, 0),
                       min(ind + width + 1, len(self.heights))):
            # print(i, new_height)
            self.heights[i] = new_height

        for i in range(1, 4):
            if self.heights[i - 1] == self.heights[
                    i + 1] and new_height == self.heights[i - 1]:
                self.heights[i] = self.heights[i - 1]

        # print(poly, ind, self.types, self.heights)

    def get_env_info(self):
        env_info = {}
        env_info["names"] = copy.deepcopy(self.names)
        env_info["blocks"] = copy.deepcopy(self.blocks)
        for i, aname in enumerate(self.names):
            info = self.get_block_info(aname)
            env_info["blocks"][i]["pos"] = copy.deepcopy(info["pos"])
            env_info["blocks"][i]["quat"] = copy.deepcopy(info["quat"])
        return env_info

    def set_env_info(self, env_info):
        self.names = env_info["names"]
        self.blocks = env_info["blocks"]
        self.initialize(True)

    def compute_accuracy(self, true_data, threshold=0.2):
        mjc_data = copy.deepcopy(true_data)

        max_err = -float('inf')
        data = self.get_env_info()

        for pred_datum in data['blocks']:
            err, mjc_match, err_pos, err_rgb = self._best_obj_match(
                pred_datum, mjc_data['blocks'])
            # del mjc_data[mjc_match]

            # print(err)
            if err > max_err:
                max_err = err
                max_pos = err_pos
                max_rgb = err_rgb

            if len(mjc_data) == 0:
                break

        correct = max_err < threshold
        return correct

    def _best_obj_match(self, pred, targs):
        def np_mse(x1, x2):
            return np.square(x1 - x2).mean()

        pos = pred['pos']
        rgb = pred['rgba']

        best_err = float('inf')
        for obj_data in targs:
            obj_name = obj_data['name']
            obj_pos = obj_data['pos']
            obj_rgb = obj_data['rgba']

            pos_err = np_mse(pos, obj_pos)
            rgb_err = np_mse(np.array(rgb), np.array(obj_rgb))
            err = pos_err + rgb_err

            if err < best_err:
                best_err = err
                best_obj = obj_name
                best_pos = pos_err
                best_rgb = rgb_err

        return best_err, best_obj, best_pos, best_rgb
示例#34
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=640, height=480, mode='offscreen',
                                             camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
示例#35
0
class SimManager(object):
    """Object to generate sequence of images with their transforms"""
    def __init__(self,
                 filepath,
                 random_params={},
                 gpu_render=False,
                 gui=False,
                 display_data=False):
        self.model = load_model_from_path(filepath)
        self.sim = MjSim(self.model)
        self.filepath = filepath
        self.gui = gui
        self.display_data = display_data
        # Take the default random params and update anything we need
        self.RANDOM_PARAMS = {}
        self.RANDOM_PARAMS.update(random_params)

        if gpu_render:
            self.viewer = MjViewer(self.sim)
        else:
            self.viewer = None

        # Get start state of params to slightly jitter later
        self.START_GEOM_POS = self.model.geom_pos.copy()
        self.START_GEOM_SIZE = self.model.geom_size.copy()
        self.START_GEOM_QUAT = self.model.geom_quat.copy()
        self.START_BODY_POS = self.model.body_pos.copy()
        self.START_BODY_QUAT = self.model.body_quat.copy()
        self.START_MATID = self.model.geom_matid.copy()
        #self.FLOOR_OFFSET = self.model.body_pos[self.model.body_name2id('floor')]

        self.tex_modder = TextureModder(self.sim)
        self.tex_modder.whiten_materials(
        )  # ensures materials won't impact colors
        self.cam_modder = CameraModder(self.sim)
        self.light_modder = LightModder(self.sim)
        self.start_obj_pose = self.sim.data.get_joint_qpos(
            'object:joint').copy()

    def get_data(self, num_images=10):
        """
        Returns camera intrinsics, and a sequence of images, pose transforms, and
        camera transforms
        """
        # randomize the scene
        self._rand_textures()
        self._rand_lights()
        self._rand_object()
        self._rand_walls()
        self._rand_distract()
        sequence = defaultdict(list)
        context = {}
        # object pose
        obj_pose, robot_pose = self._get_ground_truth()
        context["obj_world_pose"] = obj_pose
        context["robot_world_pose"] = robot_pose
        self._cam_step = 0
        self._cam_choices = np.array([[-1.75, 0, 2], [-1.75, 0, 1.62],
                                      [-1.3, 1.7, 1.62]])
        self._curr_cam_pos = self._cam_choices[0]
        for i in range(num_images):
            self._next_camera()
            self._forward()
            img = self._get_cam_frame()
            sequence["img"].append(img)
            cam_pos = self.cam_modder.get_pos("camera1")
            cam_quat = self.cam_modder.get_quat("camera1")
            cam_pose = np.concatenate([cam_pos, cam_quat]).astype(np.float32)
            sequence["cam_pose"].append(cam_pose)

        cam_id = self.cam_modder.get_camid("camera1")
        fovy = self.sim.model.cam_fovy[cam_id]
        width, height = 640, 480
        f = 0.5 * height / math.tan(fovy * math.pi / 360)
        camera_intrinsics = np.array(
            ((f, 0, width / 2), (0, f, height / 2), (0, 0, 1)))
        context['cam_matrix'] = camera_intrinsics
        return context, sequence

    def _forward(self):
        """Advances simulator a step (NECESSARY TO MAKE CAMERA AND LIGHT MODDING WORK)
        And add some visualization"""
        self.sim.forward()
        if self.viewer and self.gui:
            # Get angle of camera and display it
            quat = np.quaternion(*self.model.cam_quat[0])
            ypr = quaternion.as_euler_angles(quat) * 180 / np.pi
            cam_pos = self.model.cam_pos[0]
            cam_fovy = self.model.cam_fovy[0]
            #self.viewer.add_marker(pos=cam_pos, label="CAM: {}{}".format(cam_pos, ypr))
            #self.viewer.add_marker(pos=cam_pos, label="CAM: {}".format(ypr))
            #self.viewer.add_marker(pos=cam_pos, label="CAM: {}".format(cam_pos))
            self.viewer.add_marker(pos=cam_pos,
                                   label="FOVY: {}, CAM: {}".format(
                                       cam_fovy, cam_pos))
            self.viewer.render()

    def _get_ground_truth(self):
        """
        Return the  position to the robot, and quaternion to the robot quaternion
        7 dim total
        """
        robot_gid = self.sim.model.geom_name2id('base_link')
        obj_gid = self.sim.model.geom_name2id('object')
        # only x and y pos needed
        obj_world_pos = self.sim.data.geom_xpos[obj_gid]
        robot_world_pos = self.sim.data.geom_xpos[robot_gid]
        # obj_pos_in_robot_frame = (self.sim.data.geom_xpos[obj_gid] - self.sim.data.geom_xpos[robot_gid])[:2]

        # robot_quat = quaternion.as_quat_array(self.model.geom_quat[robot_gid].copy())
        obj_world_quat = self.model.geom_quat[obj_gid].copy()
        robot_world_quat = self.model.geom_quat[robot_gid].copy()
        # # want quat of obj relative to robot frame
        # # obj_q = robot_q * localrot
        # # robot_q.inv * obj_q = localrot
        # rel_quat = quaternion.as_float_array(robot_quat.inverse() * obj_quat)
        # pose = np.concatenate([obj_pos_in_robot_frame, rel_quat]).astype(np.float32)
        obj_pose = self.sim.data.get_joint_qpos('object:joint').copy()
        robot_pose = np.concatenate([robot_world_pos,
                                     robot_world_quat]).astype(np.float32)
        return obj_pose, robot_pose

    def _get_cam_frame(self, ground_truth=None):
        """Grab an image from the camera (224, 244, 3) to feed into CNN"""
        #IMAGE_NOISE_RVARIANCE = Range(0.0, 0.0001)
        cam_img = self.sim.render(
            640, 480, camera_name='camera1'
        )[::-1, :, :]  # Rendered images are upside-down.
        # make camera crop be more like kinect
        #cam_img = self.sim.render(854, 480, camera_name='camera1')[::-1, 107:-107, :] # Rendered images are upside-down.

        #image_noise_variance = sample(IMAGE_NOISE_RVARIANCE)
        #cam_img = (skimage.util.random_noise(cam_img, mode='gaussian', var=image_noise_variance) * 255).astype(np.uint8)

        if self.display_data:
            print(ground_truth)
            #label = str(ground_truth[3:6])
            display_image(cam_img, mode='preproc')  #, label)

        # cam_img = preproc_image(cam_img)
        return cam_img

    def _randomize(self):
        self._rand_textures()
        self._rand_camera()
        self._rand_lights()
        #self._rand_robot()
        self._rand_object()
        self._rand_walls()
        self._rand_distract()

    def _rand_textures(self):
        """Randomize all the textures in the scene, including the skybox"""
        bright = np.random.binomial(1, 0.5)
        for name in self.sim.model.geom_names + ('skybox', ):
            self.tex_modder.rand_all(name)
            if bright:
                self.tex_modder.brighten(name, np.random.randint(0, 150))

    def _rand_camera(self):
        """Randomize pos, orientation, and fov of camera
        real camera pos is -1.75, 0, 1.62
        FOVY:
        Kinect2 is 53.8
        ASUS is 45
        https://www.asus.com/us/3D-Sensor/Xtion_PRO_LIVE/specifications/
        http://smeenk.com/kinect-field-of-view-comparison/
        """
        # Params
        FOVY_R = Range(40, 50)
        #X = Range(-3, -1)
        #Y = Range(-1, 3)
        #Z = Range(1, 2)
        #C_R3D = Range3D(X, Y, Z)
        #cam_pos = sample_xyz(C_R3D)
        #L_R3D = rto3d([-0.1, 0.1])

        C_R3D = Range3D([-0.07, 0.07], [-0.07, 0.07], [-0.07, 0.07])
        ANG3 = Range3D([-3, 3], [-3, 3], [-3, 3])

        # Look approximately at the robot, but then randomize the orientation around that
        cam_choices = np.array([[-1.75, 0, 1.62], [-1.3, 1.7, 1.62],
                                [-1.75, 0, 2]])
        cam_pos = cam_choices[np.random.choice(len(cam_choices))]
        # cam_pos = get_real_cam_pos(FLAGS.real_data_path)
        target_id = self.model.body_name2id(FLAGS.look_at)

        cam_off = 0  #sample_xyz(L_R3D)
        target_off = 0  #sample_xyz(L_R3D)
        quat = look_at(cam_pos + cam_off,
                       self.sim.data.body_xpos[target_id] + target_off)
        quat = jitter_angle(quat, ANG3)
        #quat = jitter_quat(quat, 0.01)

        cam_pos += sample_xyz(C_R3D)

        self.cam_modder.set_quat('camera1', quat)
        self.cam_modder.set_pos('camera1', cam_pos)
        self.cam_modder.set_fovy('camera1', 60)  # hard code to wide fovy

    def _next_camera(self):
        """Randomize pos, orientation, and fov of camera
        real camera pos is -1.75, 0, 1.62
        FOVY:
        Kinect2 is 53.8
        ASUS is 45
        https://www.asus.com/us/3D-Sensor/Xtion_PRO_LIVE/specifications/
        http://smeenk.com/kinect-field-of-view-comparison/
        """
        # Params
        FOVY_R = Range(40, 50)
        #X = Range(-3, -1)
        #Y = Range(-1, 3)
        #Z = Range(1, 2)
        #C_R3D = Range3D(X, Y, Z)
        #cam_pos = sample_xyz(C_R3D)
        #L_R3D = rto3d([-0.1, 0.1])

        C_R3D = Range3D([-0.07, 0.07], [-0.07, 0.07], [-0.07, 0.07])
        ANG3 = Range3D([-3, 3], [-3, 3], [-3, 3])

        # Look approximately at the robot, but then randomize the orientation around that

        # linearly interpolate to the next camera every K steps
        K = 5
        goal_cam_pos = self._cam_choices[(self._cam_step // K) + 1]
        offset = goal_cam_pos - self._curr_cam_pos
        offset *= (self._cam_step % K) / K
        self._curr_cam_pos += offset
        cam_pos = self._curr_cam_pos
        self._cam_step += 1

        # cam_pos = cam_choices[np.random.choice(len(cam_choices))]
        # cam_pos = get_real_cam_pos(FLAGS.real_data_path)
        target_id = self.model.body_name2id(FLAGS.look_at)

        cam_off = 0  #sample_xyz(L_R3D)
        target_off = 0  #sample_xyz(L_R3D)
        quat = look_at(cam_pos + cam_off,
                       self.sim.data.body_xpos[target_id] + target_off)
        quat = jitter_angle(quat, ANG3)
        #quat = jitter_quat(quat, 0.01)

        cam_pos += sample_xyz(C_R3D)

        self.cam_modder.set_quat('camera1', quat)
        self.cam_modder.set_pos('camera1', cam_pos)
        self.cam_modder.set_fovy('camera1', 60)  # hard code to wide fovy

    def _rand_lights(self):
        """Randomize pos, direction, and lights"""
        # light stuff
        #X = Range(-1.5, 1.5)
        #Y = Range(-1.2, 1.2)
        #Z = Range(0, 2.8)
        X = Range(-1.5, -0.5)
        Y = Range(-0.6, 0.6)
        Z = Range(1.0, 1.5)
        LIGHT_R3D = Range3D(X, Y, Z)
        LIGHT_UNIF = Range3D(Range(0, 1), Range(0, 1), Range(0, 1))

        # TODO: also try not altering the light dirs and just keeping them at like -1, or [0, -0.15, -1.0]
        for i, name in enumerate(self.model.light_names):
            lid = self.model.light_name2id(name)
            # random sample 80% of any given light being on
            if lid != 0:
                self.light_modder.set_active(name, sample([0, 1]) < 0.8)
                self.light_modder.set_dir(name, sample_light_dir())

            self.light_modder.set_pos(name, sample_xyz(LIGHT_R3D))

            #self.light_modder.set_dir(name, sample_xyz(rto3d([-1,1])))

            #self.light_modder.set_specular(name, sample_xyz(LIGHT_UNIF))
            #self.light_modder.set_diffuse(name, sample_xyz(LIGHT_UNIF))
            #self.light_modder.set_ambient(name, sample_xyz(LIGHT_UNIF))

            spec = np.array([sample(Range(0.5, 1))] * 3)
            diffuse = np.array([sample(Range(0.5, 1))] * 3)
            ambient = np.array([sample(Range(0.5, 1))] * 3)

            self.light_modder.set_specular(name, spec)
            self.light_modder.set_diffuse(name, diffuse)
            self.light_modder.set_ambient(name, ambient)
            #self.model.light_directional[lid] = sample([0,1]) < 0.2
            self.model.light_castshadow[lid] = sample([0, 1]) < 0.5

    def _rand_robot(self):
        """Randomize joint angles and jitter orientation"""
        jnt_shape = self.sim.data.qpos.shape
        self.sim.data.qpos[:] = sample_joints(self.model.jnt_range, jnt_shape)

        robot_gid = self.model.geom_name2id('robot_table_link')
        self.model.geom_quat[robot_gid] = jitter_quat(
            self.START_GEOM_QUAT[robot_gid], 0.01)

    def _rand_object(self):
        obj_gid = self.sim.model.geom_name2id('object')
        obj_bid = self.sim.model.geom_name2id('object')
        table_gid = self.model.geom_name2id('object_table')
        table_bid = self.model.body_name2id('object_table')

        obj_pose = self.start_obj_pose.copy()
        xval = self.model.geom_size[table_gid][
            0]  #- self.model.geom_size[obj_gid][0]
        yval = self.model.geom_size[table_gid][
            1]  #- self.model.geom_size[obj_gid][1]

        O_X = Range(-xval, xval)
        O_Y = Range(-yval, yval)
        O_Z = Range(0, 0)
        O_R3D = Range3D(O_X, O_Y, O_Z)

        newpos = obj_pose[:3] + sample_xyz(O_R3D)
        newquat = jitter_quat(obj_pose[3:], 0.1)
        obj_pose[:3] = newpos
        obj_pose[3:] = newquat
        self.sim.data.set_joint_qpos('object:joint', obj_pose)

        #T_X = Range(-0.1, 0.1)
        #T_Y = Range(-0.1, 0.1)
        #T_Z = Range(-0.1, 0.1)
        #T_R3D = Range3D(T_X, T_Y, T_Z)
        #self.model.body_pos[table_bid] = self.START_BODY_POS[table_bid] + sample_xyz(T_R3D)
        ## randomize orientation a wee bit
        #self.model.geom_quat[table_gid] = jitter_quat(self.START_GEOM_QUAT[table_gid], 0.01)

    def _rand_walls(self):
        wall_bids = {
            name: self.model.body_name2id(name)
            for name in ['wall_' + dir for dir in 'nesw']
        }
        window_gid = self.model.geom_name2id('west_window')
        #floor_gid = self.model.geom_name2id('floor')

        WA_X = Range(-0.2, 0.2)
        WA_Y = Range(-0.2, 0.2)
        WA_Z = Range(-0.1, 0.1)
        WA_R3D = Range3D(WA_X, WA_Y, WA_Z)

        WI_X = Range(-0.1, 0.1)
        WI_Y = Range(0, 0)
        WI_Z = Range(-0.5, 0.5)
        WI_R3D = Range3D(WI_X, WI_Y, WI_Z)

        R = Range(0, 0)
        P = Range(-10, 10)
        Y = Range(0, 0)
        RPY_R = Range3D(R, P, Y)

        #self.model.geom_quat[floor_gid] = jitter_quat(self.START_GEOM_QUAT[floor_gid], 0.01)
        #self.model.geom_pos[floor_gid] = self.START_GEOM_POS[floor_gid] + [0,0,sample(-0.1,0.1)

        self.model.geom_quat[window_gid] = sample_quat(RPY_R)
        #self.model.geom_quat[window_gid] = jitter_quat(self.START_GEOM_QUAT[window_gid], 0.01)
        self.model.geom_pos[
            window_gid] = self.START_GEOM_POS[window_gid] + sample_xyz(WI_R3D)

        for name in wall_bids:
            gid = wall_bids[name]
            self.model.body_quat[gid] = jitter_quat(self.START_BODY_QUAT[gid],
                                                    0.01)
            self.model.body_pos[gid] = self.START_BODY_POS[gid] + sample_xyz(
                WA_R3D)

    def _rand_distract(self):
        PREFIX = 'distract'
        geom_names = [
            name for name in self.model.geom_names if name.startswith(PREFIX)
        ]

        # Size range
        SX = Range(0.01, 0.5)
        SY = Range(0.01, 0.9)
        SZ = Range(0.01, 0.5)
        S3D = Range3D(SX, SY, SZ)
        # Back range
        B_PX = Range(-0.5, 2)
        B_PY = Range(-1.5, 2)
        B_PZ = Range(0, 3)
        B_P3D = Range3D(B_PX, B_PY, B_PZ)
        # Front range
        F_PX = Range(-2, -0.5)
        F_PY = Range(-2, 1)
        F_PZ = Range(0, 0.5)
        F_P3D = Range3D(F_PX, F_PY, F_PZ)

        for name in geom_names:
            gid = self.model.geom_name2id(name)
            range = B_P3D if np.random.binomial(1, 0.5) else F_P3D

            self.model.geom_pos[gid] = sample_xyz(range)
            self.model.geom_quat[gid] = random_quat()
            self.model.geom_size[gid] = sample_xyz(S3D, mode='logspace')
            self.model.geom_type[gid] = sample_geom_type()
            self.model.geom_rgba[gid][-1] = np.random.binomial(1, 0.5)

    def _set_visible(self, prefix, range_top, visible):
        """Helper function to set visibility of several objects"""
        if not visible:
            if range_top == 0:
                name = prefix
                gid = self.model.geom_name2id(name)
                self.model.geom_rgba[gid][-1] = 0.0

            for i in range(range_top):
                name = "{}{}".format(prefix, i)
                gid = self.model.geom_name2id(name)
                self.model.geom_rgba[gid][-1] = 0.0
        else:
            if range_top == 0:
                name = prefix
                gid = self.model.geom_name2id(name)
                self.model.geom_rgba[gid][-1] = 1.0

            for i in range(range_top):
                name = "{}{}".format(prefix, i)
                gid = self.model.geom_name2id(name)
                self.model.geom_rgba[gid][-1] = 1.0
class DeterministicGraspPolicy(Policy):
    def __init__(self, agentparams, policyparams):
        Policy.__init__(self)
        self.agentparams = agentparams
        self.min_lift = agentparams.get('min_z_lift', 0.05)

        self.policyparams = policyparams
        self.adim = agentparams['adim']
        self.n_actions = policyparams['nactions']

        if 'num_samples' in self.policyparams:
            self.M = self.policyparams['num_samples']
        else:
            self.M = 20  # number of CEM Samples

        if 'best_to_take' in self.policyparams:
            self.K = self.policyparams['best_to_take']
        else:
            self.K = 5  # best samples to average for next sampling

        assert self.adim >= 4, 'must have at least x, y, z + gripper actions'

        self.moveto = True
        self.drop = False
        self.grasp = False
        self.lift = False

        if 'iterations' in self.policyparams:
            self.niter = self.policyparams['iterations']
        else:
            self.niter = 10  # number of iterations
        self.imgs = []
        self.iter = 0

    def setup_CEM_model(self, t, init_model):
        if t == 0:
            if 'gen_xml' in self.agentparams:

                self.CEM_model = MjSim(
                    load_model_from_path(self.agentparams['gen_xml_fname']))
            else:
                self.CEM_model = MjSim(
                    load_model_from_path(self.agentparams['filename']))

        self.initial_qpos = init_model.data.qpos.copy()
        self.initial_qvel = init_model.data.qvel.copy()

        self.reset_CEM_model()

    def reset_CEM_model(self):
        if len(self.imgs) > 0:
            print('saving iter', self.iter, 'with frames:', len(self.imgs))
            npy_to_gif(
                self.imgs,
                os.path.join(self.agentparams['record'],
                             'iter_{}'.format(self.iter)))
            self.iter += 1

        sim_state = self.CEM_model.get_state()
        sim_state.qpos[:] = self.initial_qpos.copy()
        sim_state.qvel[:] = self.initial_qvel.copy()
        self.CEM_model.set_state(sim_state)

        self.prev_target = self.CEM_model.data.qpos[:self.adim].squeeze().copy(
        )
        self.target = self.CEM_model.data.qpos[:self.adim].squeeze().copy()

        for _ in range(5):
            self.step_model(self.target)

        self.imgs = []

    def viewer_refresh(self):
        if 'debug_viewer' in self.policyparams and self.policyparams[
                'debug_viewer']:
            large_img = self.CEM_model.render(
                640, 480, camera_name="maincam")[::-1, :, :]
            self.imgs.append(large_img)

    def perform_CEM(self, targetxy):
        self.reset_CEM_model()

        if 'object_meshes' in self.agentparams:
            targetxy = self.CEM_model.data.sensordata[:2].squeeze().copy()
        print('Beginning CEM')
        ang_dis_mean = self.policyparams['init_mean'].copy()
        ang_dis_cov = self.policyparams['init_cov'].copy()
        scores = np.zeros(self.M)
        best_score, best_ang, best_xy = -1, None, None

        for n in range(self.niter):
            ang_disp_samps = np.random.multivariate_normal(ang_dis_mean,
                                                           ang_dis_cov,
                                                           size=self.M)

            for s in range(self.M):
                #print('On iter', n, 'sample', s)
                self.reset_CEM_model()
                move = True
                drop = False
                grasp = False
                g_start = 0
                lift = False

                angle_delta = ang_disp_samps[s, 0]
                targetxy_delta = targetxy + ang_disp_samps[s, 1:]
                print('init iter')
                print(targetxy)
                print(angle_delta, targetxy_delta)
                for t in range(self.n_actions):
                    angle_action = np.zeros(self.adim)
                    cur_xy = self.CEM_model.data.qpos[:2].squeeze()

                    if move and np.linalg.norm(
                            targetxy_delta - cur_xy,
                            2) <= self.policyparams['drop_thresh']:
                        move = False
                        drop = True

                    if drop and self.CEM_model.data.qpos[2] <= -0.079:
                        drop = False
                        grasp = True
                        g_start = t
                    if grasp and t - g_start > 2:
                        grasp = False
                        lift = True
                    if move:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = self.agentparams['ztarget']
                        angle_action[3] = angle_delta
                        angle_action[4] = -100
                    elif drop:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = -0.08
                        angle_action[3] = angle_delta
                        angle_action[4] = -100
                    elif grasp:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = -0.08
                        angle_action[3] = angle_delta
                        angle_action[4] = 21
                    elif lift:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = self.agentparams['ztarget']
                        angle_action[3] = angle_delta
                        angle_action[4] = 21

                    self.step_model(angle_action)
                # print 'final z', self.CEM_model.data.qpos[8].squeeze(), 'with angle', angle_samps[s]

                if 'object_meshes' in self.agentparams:
                    obj_z = self.CEM_model.data.sensordata[2].squeeze()
                else:
                    obj_z = self.CEM_model.data.qpos[8].squeeze()

                print('obj_z at iter', n * self.M + s, 'is', obj_z)

                scores[s] = obj_z

                if 'stop_iter_thresh' in self.policyparams and scores[
                        s] > self.policyparams['stop_iter_thresh']:
                    return ang_disp_samps[s, 0], ang_disp_samps[s, 1:]
                # print 'score',scores[s]

            best_scores = np.argsort(-scores)[:self.K]

            if scores[best_scores[0]] > best_score or best_ang is None:

                #print('best', scores[best_scores[0]])

                best_score = scores[best_scores[0]]
                best_ang = ang_disp_samps[best_scores[0], 0]
                best_xy = ang_disp_samps[best_scores[0], 1:]

            ang_dis_mean = np.mean(ang_disp_samps[best_scores, :], axis=0)
            ang_dis_cov = np.cov(ang_disp_samps[best_scores, :].T)

        return best_ang, best_xy

    def step_model(self, input_actions):
        pos_clip = self.agentparams['targetpos_clip']

        self.prev_target = self.target.copy()
        self.target = input_actions.copy()
        self.target = np.clip(self.target, pos_clip[0], pos_clip[1])

        for s in range(self.agentparams['substeps']):
            step_action = s / float(self.agentparams['substeps']) * (
                self.target - self.prev_target) + self.prev_target
            self.CEM_model.data.ctrl[:] = step_action
            self.CEM_model.step()

        self.viewer_refresh()

        #print "end", self.CEM_model.data.qpos[:4].squeeze()

    def act(self,
            traj,
            t,
            init_model=None,
            goal_object_pose=None,
            hyperparams=None,
            goal_image=None):
        """
        Scripted pick->place->wiggle trajectory. There's probably a better way to script this
        but booleans will suffice for now.
        """
        self.setup_CEM_model(t, init_model)

        if t == 0:
            self.moveto = True
            self.drop = False
            self.lift = False
            self.grasp = False
            self.second_moveto = False
            self.final_drop = False
            self.wiggle = False
            self.switchTime = 0
            print('start pose', traj.Object_pose[t, 0, :3])
            self.targetxy = traj.Object_pose[t, 0, :2]
            self.angle, self.disp = self.perform_CEM(self.targetxy)
            print('best angle', self.angle, 'best target', self.targetxy)
            self.targetxy += self.disp
            traj.desig_pos = np.zeros((2, 2))
            traj.desig_pos[0] = self.targetxy.copy()

        if self.lift and traj.X_full[t, 2] >= self.min_lift:
            self.lift = False

            if traj.Object_full_pose[t, 0, 2] > -0.01:  #lift occursed
                self.second_moveto = True
                self.targetxy = np.random.uniform(-0.2, 0.2, size=2)
                print("LIFTED OBJECT!")
                print('dropping at', self.targetxy)
                traj.desig_pos[1] = self.targetxy.copy()
            else:
                self.wiggle = True

        if self.grasp and self.switchTime > 0:
            print('lifting at time', t, '!', 'have z', traj.X_full[t, 2])

            self.grasp = False
            self.lift = True

        if self.drop and (traj.X_full[t, 2] <= -0.079 or self.switchTime >= 2):
            print('grasping at time', t, '!', 'have z', traj.X_full[t, 2])
            self.drop = False
            self.grasp = True
            self.switchTime = 0

        if self.moveto and (np.linalg.norm(traj.X_full[t, :2] - self.targetxy,
                                           2) <=
                            self.policyparams['drop_thresh']):
            if self.switchTime > 0:
                print('stopping moveto at time', t, '!')
                print(traj.X_full[t, :2], self.targetxy)
                self.moveto = False
                self.drop = True
                self.switchTime = 0
            else:
                self.switchTime += 1

        if self.second_moveto and np.linalg.norm(
                traj.X_full[t, :2] - self.targetxy,
                2) <= self.policyparams['drop_thresh']:
            self.second_moveto = False
            self.final_drop = True
            self.switchTime = 0

        actions = np.zeros(self.adim)
        if self.moveto or self.second_moveto:
            delta = np.zeros(3)
            delta[:2] = self.targetxy - traj.target_qpos[t, :2]

            if 'xyz_std' in self.policyparams:
                delta += self.policyparams['xyz_std'] * np.random.normal(
                    size=3)

            norm = np.sqrt(np.sum(np.square(delta)))
            if norm > self.policyparams['max_norm']:
                delta *= self.policyparams['max_norm'] / norm

            actions[:3] = traj.target_qpos[t, :3] + delta
            actions[2] = self.agentparams['ztarget']
            actions[3] = self.angle

            if self.moveto:
                actions[-1] = -1
            else:
                actions[-1] = 1

            self.switchTime += 1

        elif self.drop:
            actions[:2] = self.targetxy
            actions[2] = -0.08
            actions[3] = self.angle
            actions[-1] = -1
            self.switchTime += 1

        elif self.lift:
            actions[:2] = self.targetxy
            actions[2] = self.agentparams['ztarget']
            actions[3] = self.angle
            actions[-1] = 1

        elif self.grasp:
            actions[:2] = self.targetxy
            actions[2] = -0.08
            actions[3] = self.angle
            actions[-1] = 1
            self.switchTime += 1
        elif self.final_drop:

            if self.switchTime > 0:
                print('opening')
                actions[:2] = self.targetxy
                actions[2] = -0.08
                actions[3] = self.angle
                actions[-1] = -1
                self.switchTime += 1

            if self.switchTime > 1:
                print('up')
                actions[:2] = self.targetxy
                actions[2] = self.agentparams['ztarget'] + np.random.uniform(
                    -0.03, 0.05)
                actions[3] = self.angle + np.random.uniform(
                    -np.pi / 4., np.pi / 4.)
                actions[-1] = -1
                self.final_drop = False
                self.wiggle = True
                self.switchTime = 0
            else:
                actions[:2] = self.targetxy
                actions[2] = -0.08
                actions[3] = self.angle
                actions[-1] = -1
                self.switchTime += 1

        elif self.wiggle:
            delta_vec = np.random.normal(size=2)
            norm = np.sqrt(np.sum(np.square(delta_vec)))
            if norm > self.policyparams['max_norm']:
                delta_vec *= self.policyparams['max_norm'] / norm
            actions[:2] = np.clip(traj.target_qpos[t, :2] + delta_vec, -0.15,
                                  0.15)
            delta_z = np.random.uniform(-0.08 - traj.target_qpos[t, 2],
                                        0.3 - traj.target_qpos[t, 2])
            actions[2] = traj.target_qpos[t, 2] + delta_z
            actions[3] = traj.target_qpos[t, 3] + np.random.uniform(-0.1, 0.1)
            if np.random.uniform() > 0.5:
                actions[4] = -1
            else:
                actions[4] = 1

        if 'angle_std' in self.policyparams:
            actions[3] += self.policyparams['angle_std'] * np.random.normal()

        return actions - traj.target_qpos[t, :] * traj.mask_rel
示例#37
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    def evaluate_success(self, paths, logger=None):
        """
        Log various success metrics calculated based on input paths into the logger
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640,480),
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1],
                                             mode='offscreen', camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
</mujoco>
"""

model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)

print("Nicely exposed function:\n")
print(sim.model.get_xml())

print("\nversus MuJoCo internals:\n\n")

functions.mj_saveLastXML("/tmp/saved.xml", model, "", 0)
with open("/tmp/saved.xml", "r") as f:
    print(f.read())

sim.render(100, 100)

modelpos = np.zeros(3)
modelquat = np.zeros(4)
roompos = np.ones(3)
roomquat = np.array([1., 0., 1., 0.])

functions.mjv_room2model(modelpos, modelquat, roompos, roomquat,
                         sim.render_contexts[0].scn)

print("\n\nAnother internal function, mjv_room2model:")
print("modelpos = %s, modelquat = %s" % (str(modelpos), str(modelquat)))

res = np.zeros(9)
functions.mju_quat2Mat(res, roomquat)
print("\n\nAnother internal function, mju_quat2Mat:\n%s" % res)
示例#39
0
                pixcount = pixcount + 1
            #else :
            #grayimg[i][j] = 0
        position = np.array([pixwx / pixcount, pixwy / pixcount])
    return position


while True:
    sim.set_state(sim_state)
    #for j in range(0,6) :
    sim.data.ctrl[:] = 0.0
    #for i in range (0, 2000) :
    #sim.data.ctrl[j] = -3.14159 + (i * 6.283)/2000.0
    sim.step()
    viewer.render()
    img = sim.render(256, 256, camera_name="cam3", depth=True)
    print(type(img))
    print(img[1])
    print(np.max(img[1]))
    print(np.min(img[1]))
    kk = img[1]
    #kk =
    #kk * 1600.0
    '''
    for i in range (0,256):
        for j in range(0,256):
            #if kk[i][j] > 256:
            val =  (kk[i][j] - 0.5945638) /(0.99736613 -0.5945638) *1500 
            if val > 255:
                  val =255
            kk[i][j] = val 
示例#40
0
    def take_images(self, filename, obj, writer, img_idx=0, debug=False):
        model = load_model_from_path(filename)
        sim = MjSim(model)
        modder= TextureModder(sim)
        # viewer=MjViewer(sim) # this f*****g line has caused me so much pain.

        embedding = np.append(obj.type, obj.geom.reshape(-1))
        if obj.type == 4 or obj.type == 5:
            # MULTI CABINET: get double the params.
            axis1, door1 = transform_param(obj.params[0][0],obj.params[0][1], obj)
            axis2, door2 = transform_param(obj.params[1][0],obj.params[1][1], obj)
            axes = np.append(axis1,axis2)
            doors = np.append(door1,door2)
            params = np.append(axes,doors)
            set_two_door_control(sim, 'cabinet2' if obj.type == 4 else 'refrigerator')
            n_qpos_variables = 2

        else:
            n_qpos_variables = 1
            if obj.type == 1:
                sim.data.ctrl[0] = 0.05

            elif obj.geom[3] == 1:
                sim.data.ctrl[0] = -0.2

            else:
                sim.data.ctrl[0] = 0.2

            params = get_cam_relative_params2(obj) # if 1DoF, params is length 10. If 2DoF, params is length 20.

        embedding_and_params = np.concatenate((embedding, params, obj.pose, obj.rotation))
        # print('nqpos', n_qpos_variables)
        # print(self.img_idx, obj.pose)
        # print(embedding_and_params.shape)
        t = 0
        while t<4000:
            sim.forward()
            sim.step()

            if t%250==0:

                #########################
                IMG_WIDTH = calibrations.sim_width
                IMG_HEIGHT = calibrations.sim_height
                #########################

                img, depth = sim.render(IMG_WIDTH, IMG_HEIGHT, camera_name='external_camera_0', depth=True)
                depth = vertical_flip(depth)
                real_depth = buffer_to_real(depth, 12.0, 0.1)
                norm_depth = real_depth / 12.0

                if self.masked:
                    # remove background
                    mask = norm_depth > 0.99
                    norm_depth = (1-mask)*norm_depth

                if self.debugging:
                    # save image to disk for visualization
                    # img = cv2.resize(img, (IMG_WIDTH,IMG_HEIGHT))

                    img = vertical_flip(img)
                    img = white_bg(img)
                    imgfname = os.path.join(self.savedir, 'img'+str(self.img_idx).zfill(6)+'.png')
                    depth_imgfname = os.path.join(self.savedir, 'depth_img'+str(self.img_idx).zfill(6)+'.png')
                    integer_depth = norm_depth * 255

                    cv2.imwrite(imgfname, img)
                    cv2.imwrite(depth_imgfname, integer_depth)

                    # noise_img = sp_noise(img,0.1)
                    # cv2.imwrite(imgfname, noise_img)

                    # noise_integer_depth = sp_noise(integer_depth,0.1)
                    # cv2.imwrite(depth_imgfname, noise_integer_depth)
                 

                # if IMG_WIDTH != 192 or IMG_HEIGHT != 108:
                #     depth = cv2.resize(norm_depth, (192,108))

                depthfname = os.path.join(self.savedir,'depth'+str(self.img_idx).zfill(6) + '.pt')
                torch.save(torch.tensor(norm_depth.copy()), depthfname)
                row = np.append(embedding_and_params, sim.data.qpos[:n_qpos_variables])
                # print(row.shape)
                writer.writerow(row)
                self.img_idx += 1

            t += 1
示例#41
0
class JuggleEnv:
    def __init__(self):
        self.control_freq: float = 50.0
        self.control_timestep: float = 1.0 / self.control_freq
        self.viewer = None
        self.horizon = 1000
        self.target = np.array([0.8, 0.0, 1.9])

        # load model
        self.robot: Robot = None
        self.arena: Arena = None
        self.pingpong: MujocoGeneratedObject = None
        self.model: MujocoWorldBase = None
        self._load_model()

        # initialize simulation
        self.mjpy_model = None
        self.sim: MjSim = None
        self.model_timestep: float = 0.0
        self._initialize_sim()

        # reset robot, object and internel variables
        self.cur_time: float = 0.0
        self.timestep: int = 0.0
        self.done: bool = False
        self._pingpong_body_id: int = -1
        self._paddle_body_id: int = -1
        self._reset_internel()

        # internel variable for scoring
        self._below_plane = False
        self.plane_height = 1.5

    def _load_model(self):
        # Load the desired controller's default config as a dict
        controller_config = load_controller_config(
            default_controller="JOINT_VELOCITY")
        controller_config["output_max"] = 1.0
        controller_config["output_min"] = -1.0
        robot_noise = {"magnitude": [0.05] * 7, "type": "gaussian"}
        self.robot = SingleArm(
            robot_type="IIWA",
            idn=0,
            controller_config=controller_config,
            initial_qpos=[0.0, 0.7, 0.0, -1.4, 0.0, -0.56, 0.0],
            initialization_noise=robot_noise,
            gripper_type="PaddleGripper",
            gripper_visualization=True,
            control_freq=self.control_freq)
        self.robot.load_model()
        self.robot.robot_model.set_base_xpos([0, 0, 0])

        self.arena = EmptyArena()
        self.arena.set_origin([0.8, 0, 0])

        self.pingpong = BallObject(name="pingpong",
                                   size=[0.02],
                                   rgba=[0.8, 0.8, 0, 1],
                                   solref=[0.1, 0.03],
                                   solimp=[0, 0, 1],
                                   density=100)
        pingpong_model = self.pingpong.get_collision()
        pingpong_model.append(
            new_joint(name="pingpong_free_joint", type="free"))
        pingpong_model.set("pos", "0.8 0 2.0")

        # merge into one
        self.model = MujocoWorldBase()
        self.model.merge(self.robot.robot_model)
        self.model.merge(self.arena)
        self.model.worldbody.append(pingpong_model)

    def _initialize_sim(self):
        # if we have an xml string, use that to create the sim. Otherwise, use the local model
        self.mjpy_model = self.model.get_model(mode="mujoco_py")

        # Create the simulation instance and run a single step to make sure changes have propagated through sim state
        self.sim = MjSim(self.mjpy_model)
        self.sim.step()
        self.robot.reset_sim(self.sim)
        self.model_timestep = self.sim.model.opt.timestep

    def _reset_internel(self):
        # reset robot
        self.robot.setup_references()
        self.robot.reset(deterministic=False)

        # reset pingpong
        pingpong_pos = self.target + np.random.rand(3) * 0.08 - 0.04
        pingpong_quat = np.array([1.0, 0.0, 0.0, 0.0])
        self.sim.data.set_joint_qpos(
            "pingpong_free_joint",
            np.concatenate([pingpong_pos, pingpong_quat]))

        # get handle for important parts
        self._pingpong_body_id = self.sim.model.body_name2id("pingpong")
        self._paddle_body_id = self.sim.model.body_name2id(
            "gripper0_paddle_body")

        # Setup sim time based on control frequency
        self.cur_time = 0
        self.timestep = 0
        self.done = False

    def reset(self):
        self.sim.reset()
        self._reset_internel()
        self.sim.forward()
        return self._get_observation()

    def _get_observation(self):
        di = OrderedDict()

        # get robot observation
        di = self.robot.get_observations(di)

        # get pingpong observation
        pingpong_pos = np.array(
            self.sim.data.body_xpos[self._pingpong_body_id])
        di["pingpong_pos"] = pingpong_pos
        return di

    def step(self, action: np.ndarray):
        if self.done:
            raise ValueError("executing action in terminated episode")

        policy_step = True
        score = 0.0
        for _ in range(int(self.control_timestep / self.model_timestep)):
            self.sim.forward()
            self.robot.control(action=action, policy_step=policy_step)
            # self.sim.data.ctrl[:] = action*5.0
            self.sim.step()
            policy_step = False
            # check if the ball pass the plane
            h = self.sim.data.body_xpos[self._pingpong_body_id][2]
            self._below_plane |= h < self.plane_height
            if self._below_plane and h > self.plane_height:
                score = 1.0
                self._below_plane = False

        self.timestep += 1
        self.cur_time += self.control_timestep
        observation = self._get_observation()
        dist_xy = np.linalg.norm(
            (observation["robot0_eef_pos"] - observation["pingpong_pos"])[:2])
        # paddle_height = observation["robot0_eef_pos"][2]
        self.done = self.timestep >= self.horizon or dist_xy > 0.2
        reward = score  # + 0 * (0.2 - dist_xy)
        return observation, reward, self.done, {}

    def render(self, mode="human"):
        if mode == "human":
            self._get_viewer().render()
        elif mode == "rgb_array":
            img = self.sim.render(1920, 1080)
            return img[::-1, :, ::-1]

    def _get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            self.viewer.vopt.geomgroup[0] = 0
            self.viewer._hide_overlay = True
        return self.viewer

    def close(self):
        self._destroy_viewer()

    def _destroy_viewer(self):
        if self.viewer is not None:
            glfw.destroy_window(self.viewer.window)
            self.viewer = None

    def seed(self):
        pass
示例#42
0
</mujoco>
"""

model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)

print("Nicely exposed function:\n")
print(sim.model.get_xml())

print("\nversus MuJoCo internals:\n\n")

functions.mj_saveLastXML("/tmp/saved.xml", model, "", 0)
with open("/tmp/saved.xml", "r") as f:
    print(f.read())

sim.render(100, 100)

modelpos = np.zeros(3)
modelquat = np.zeros(4)
roompos = np.ones(3)
roomquat = np.array([1., 0., 1., 0.])

functions.mjv_room2model(modelpos, modelquat, roompos,
                         roomquat, sim.render_contexts[0].scn)

print("\n\nAnother internal function, mjv_room2model:")
print("modelpos = %s, modelquat = %s" % (str(modelpos), str(modelquat)))


res = np.zeros(9)
functions.mju_quat2Mat(res, roomquat)