def main(): wait = True offscreen_z = -3 # set z to here to teleport object off-screen # should work for objects we're using... if object still visible, decrease number controller = ai2thor.controller.Controller(quality='High') controller.start(player_screen_height=player_screen_height, player_screen_width=player_screen_width) controller.reset('FloorPlan1') controller.step(dict(action='Initialize', gridsize=0.25)) # move unnecessary objects offscreen # controller.step(dict(action='TeleportObject', objectId=objectId_dict['apple'], z=offscreen_z)) # controller.step(dict(action='TeleportObject', objectId=objectId_dict['bread'], z=offscreen_z)) # controller.step(dict(action='TeleportObject', objectId=objectId_dict['butter_knife'], z=offscreen_z)) # controller.step(dict(action='TeleportObject', objectId=objectId_dict['butter_knife'], z=offscreen_z)) # controller.step(dict(action='TeleportFull', x=0, y=1, z=-1.75, rotation=180, horizon=0)) if wait: input("press enter to close...")
def dump_scene(scene_name, base_dir, renderObjectImage=False, renderDepthImage=False, renderClassImage=False): controller = ai2thor.controller.Controller() controller.start(height=448, width=448) controller.reset(scene_name) event = controller.step( dict(action='Initialize', fieldOfView=90, gridSize=0.25, renderDepthImage=renderDepthImage, renderObjectImage=renderObjectImage, renderClassImage=renderClassImage)) dump_scene_controller(base_dir, controller) controller.stop()
def single_worker(num_envs, steps_per_proc, actions=actions): controllers = [] for i in range(num_envs): controller = ai2thor.controller.Controller() controller.start() controller.step(dict(action='Initialize', gridSize=0.25)) controllers.append(controller) count = len(actions) start = time.time() for _ in range(steps_per_proc): int_actions = np.random.randint(count, size=num_envs) [ c.step(dict(action=actions[a])) for c, a in zip(controllers, int_actions) ] total_time = time.time() - start print(f"FPS:{steps_per_proc *num_envs / total_time:.2f}") [c.stop() for c in controllers]
def dump_scene( scene_name, base_dir, renderInstanceSegmentation=False, renderDepthImage=False, renderSemanticSegmentation=False, ): controller = ai2thor.controller.Controller() controller.start(height=448, width=448) controller.reset(scene_name) event = controller.step( dict( action="Initialize", fieldOfView=90, gridSize=0.25, renderDepthImage=renderDepthImage, renderInstanceSegmentation=renderInstanceSegmentation, renderSemanticSegmentation=renderSemanticSegmentation, )) dump_scene_controller(base_dir, controller) controller.stop()
def worker(steps_per_proc, sync_event, queue, gpu_id=0, actions=actions): os.environ['DISPLAY'] = f":{gpu_id}" controller = ai2thor.controller.Controller() controller.start() controller.step(dict(action='Initialize', gridSize=0.25)) print("Worker with pid:", os.getpid(), "is intialized") np.random.seed(os.getpid()) #inform main process that intialization is successful queue.put(1) sync_event.wait() count = len(actions) for _ in range(steps_per_proc): a = np.random.randint(count) controller.step(dict(action=actions[a])) queue.put(1) print("Worker with pid:", os.getpid(), " funished job") sync_event.clear() sync_event.wait() controller.stop()
def save_layout(*layouts, wait=False, save_all=False): # layouts: list of layouts you want to save. # wait: whether to wait for user input before moving on to next layout # save_all: whether to save all layouts controller = ai2thor.controller.Controller(quality='High') controller.start(player_screen_height=800, player_screen_width=1200) controller.reset('FloorPlan1') controller.step(dict(action='Initialize', gridsize=0.25)) if save_all: for layout in layout_dict.keys(): if wait: event = view_layout(layout, controller) else: event = view_layout(layout, controller, False) save_img(event, layout) else: for layout in layouts: if wait: event = view_layout(layout, controller) else: event = view_layout(layout, controller, False) save_img(event, layout)
def check_size(scene): f = h5py.File("dumped/{}.hdf5".format(scene), "w") locations = [] visible_objects = [] controller = ai2thor.controller.Controller() controller.start() controller.reset(scene) controller.random_initialize(unique_object_types=True) event = controller.step(dict(action='Initialize', gridSize=0.5)) y_coord = event.metadata['agent']['position']['y'] locations.append((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) # Using BFS to discover all reachable positions in current environment. visited = set() visited.add((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) while len(locations) > 0: loc = locations.pop(0) for act in ALL_POSSIBLE_ACTIONS: controller.step( dict(action='Teleport', x=loc[0], y=y_coord, z=loc[1])) event = controller.step(dict(action=act)) if event.metadata['lastActionSuccess']: new_loc = (event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z']) if new_loc not in visited: visited.add(new_loc) locations.append(new_loc) all_locs = list(visited) controller.stop() return len(all_locs)
def dump_feature(scene, cat2idx): f = h5py.File("dumped/{}.hdf5".format(scene), "a") states = f['locations'][()] laser = f['lasers'][()] dump_features = [] controller = ai2thor.controller.Controller() controller.start() controller.reset(scene) event = controller.step( dict(action='Initialize', gridSize=0.5, visibilityDistance=1000.0)) for i, state in enumerate(states): event = controller.step( dict(action='TeleportFull', x=state[0], y=1.25, z=state[1], rotation=state[2], horizon=30)) visible = [obj for obj in event.metadata['objects'] if obj['visible']] df = np.zeros(len(cat2idx) + 4) df[-4:] = laser[i].tolist() for obj in visible: try: obj_id = cat2idx[obj['objectType']] df[obj_id] = obj['distance'] except: print(obj['objectType']) dump_features.append(df) controller.stop() f.create_dataset("dump_features", data=np.asarray(dump_features, np.float32)) f.close()
def view_layout(layout, controller=None, wait=True): # layout: layout that you would like to view # controller: feed in a controller, or leave as None to have the function create one. Providing a controller is # useful if you are viewing multiple layouts consecutively and want only one unity window # wait: whether to wait for user input before closing unity window if controller is None: controller = ai2thor.controller.Controller(quality='High') controller.start(player_screen_height=player_screen_height, player_screen_width=player_screen_width) controller.reset('FloorPlan1') controller.step(dict(action='Initialize', gridsize=0.25)) else: controller.reset('FloorPlan1') # how to create/teleport an object # controller.step( # dict(action='CreateObject', objectType='Tomato', randomizeObjectAppearance=False, objectVariation=1)) # controller.step(dict(action='DropHandObject')) # controller.step(dict(action='TeleportObject', objectId='Tomato|1', x=-0.39, y=1.74, z=-0.81)) event = arrange(controller, layout) if wait: input("press enter to close...") return event
def check_visible_objects_closed_receptacles(ctx, start_scene, end_scene): from itertools import product import ai2thor.controller controller = ai2thor.controller.BFSController() controller.local_executable_path = 'unity/builds/thor-local-OSXIntel64.app/Contents/MacOS/thor-local-OSXIntel64' controller.start() for i in range(int(start_scene), int(end_scene)): print("working on floorplan %s" % i) controller.search_all_closed('FloorPlan%s' % i) visibility_object_id = None visibility_object_types = ['Mug', 'CellPhone', 'SoapBar'] for obj in controller.last_event.metadata['objects']: if obj['pickupable']: controller.step(action=dict(action='PickupObject', objectId=obj['objectId'], forceVisible=True)) if visibility_object_id is None and obj[ 'objectType'] in visibility_object_types: visibility_object_id = obj['objectId'] if visibility_object_id is None: raise Exception("Couldn't get a visibility_object") bad_receptacles = set() for point in controller.grid_points: controller.step(dict(action='Teleport', x=point['x'], y=point['y'], z=point['z']), raise_for_failure=True) for rot, hor in product(controller.rotations, controller.horizons): event = controller.step(dict(action='RotateLook', rotation=rot, horizon=hor), raise_for_failure=True) for j in event.metadata['objects']: if j['receptacle'] and j['visible'] and j['openable']: controller.step( action=dict(action='Replace', forceVisible=True, pivot=0, receptacleObjectId=j['objectId'], objectId=visibility_object_id)) replace_success = controller.last_event.metadata[ 'lastActionSuccess'] if replace_success: if controller.is_object_visible( visibility_object_id ) and j['objectId'] not in bad_receptacles: bad_receptacles.add(j['objectId']) print("Got bad receptacle: %s" % j['objectId']) # import cv2 # cv2.imshow('aoeu', controller.last_event.cv2image()) # cv2.waitKey(0) controller.step( action=dict(action='PickupObject', objectId=visibility_object_id, forceVisible=True))
import ai2thor.controller import cv2 import keyboard import yolo def exportFrame(): event = controller.step(dict(action='Initialize', continuous=True)) cv2.imwrite('frame.jpg', event.cv2img) yolo.detect() if __name__ == "__main__": controller = ai2thor.controller.Controller() controller.start(player_screen_height=480, player_screen_width=480) controller.reset('FloorPlan28') while True: try: if keyboard.is_pressed('a'): event = controller.step(dict(action='MoveLeft')) exportFrame() elif keyboard.is_pressed('d'): event = controller.step(dict(action='MoveRight')) exportFrame() elif keyboard.is_pressed('w'): event = controller.step(dict(action='MoveAhead')) exportFrame() elif keyboard.is_pressed('s'): event = controller.step(dict(action='MoveBack')) exportFrame() elif keyboard.is_pressed('right arrow'):
#import pytest import os import ai2thor.controller def releases_dir(self): return os.path.normpath( os.path.join(os.path.abspath(__file__), "..", "..", "..", "unity", "builds")) controller = ai2thor.controller.Controller() controller.releases_dir = releases_dir.__get__(controller, ai2thor.controller.Controller) controller.start() controller.reset('FloorPlan28') controller.step(dict(action='Initialize', gridSize=0.25)) #@pytest.fixture #def controller(): # return c def assert_near(point1, point2): assert point1.keys() == point2.keys() for k in point1.keys(): assert round(point1[k], 3) == round(point2[k], 3) def test_lookdown():
def start_controller(args, ep1, obj_list, target_parents): controller = ai2thor.controller.Controller() controller.start(player_screen_height=500, player_screen_width=500) controller.reset(ep1[0]) event = controller.step( dict(action='Initialize', gridSize=0.25, fieldOfView=90, renderObjectImage=True)) event = controller.step( dict(action='TeleportFull', x=ep1[2][0], y=ep1[2][1], z=ep1[2][2], rotation=ep1[2][3], horizon=ep1[2][4])) frames = [] rotation_list = ['RotateRight', 'RotateLeft'] angle = 0 for i in range(len(ep1) - 5): print(ep1[i + 4]) time.sleep(0.5) img = img_bbx(args, event, ep1[i + 4], obj_list, ep1, target_parents) pos = event.metadata['agent']['position'] rot = event.metadata['agent']['rotation'] if ep1[i + 4] == 'RotateLeft': event = controller.step( dict(action='TeleportFull', x=pos['x'], y=pos['y'], z=pos['z'], rotation=rot['y'] - 45, horizon=angle)) elif ep1[i + 4] == 'RotateRight': event = controller.step( dict(action='TeleportFull', x=pos['x'], y=pos['y'], z=pos['z'], rotation=rot['y'] + 45, horizon=angle)) elif ep1[i + 4] == 'LookDown': event = controller.step(dict(action=ep1[i + 4])) angle += 30 angle = np.clip(angle, -60, 60) elif ep1[i + 4] == 'LookUp': event = controller.step(dict(action=ep1[i + 4])) angle -= 30 angle = np.clip(angle, -60, 60) else: event = controller.step( dict(action='TeleportFull', x=pos['x'], y=pos['y'], z=pos['z'], rotation=rot['y'], horizon=angle)) event = controller.step(dict(action=ep1[i + 4])) frames.append(img) time.sleep(1.5) img = img_bbx(args, event, ep1[-1], obj_list, ep1, target_parents) frames.append(img) time.sleep(3) controller.stop() cv2.destroyAllWindows() return frames
from pynput.keyboard import Key, Listener from pynput import keyboard import json import ai2thor.controller import inquirer from shapely.geometry import Point, Polygon controller = ai2thor.controller.Controller() controller.local_executable_path = "/home/yiding/Documents/pr-as-planning-demo/thor/thor-linux/thor-local-Linux64" event = controller.start(player_screen_width=1000, player_screen_height=666) controller.step(dict(action='Initialize', gridSize=0.25, visibilityDistance=1)) rotation = 245 hand_empty = True object_on_hand = {} foyer_coords = [(2.041, 11.579), (2.041, 8.4475), (-4.2456, 8.4475), (-4.2456, 11.576)] bedroom_coords = [(-4.3336, 11.105), (-4.3336, 7.305), (-9.7336, 7.305), (-9.7336, 11.105)] livingroom_coords = [(6.0463, 8.4976), (6.0463, 2.27), (-2.3615, 2.27), (-2.3615, 8.4976)] bathroom_coords = [(6.568, 1.198), (6.568, -0.055355), (4.9768, -0.055355), (4.9768, -2.002), (2.368, -2.002), (2.368, 1.198)] kitchen_coords = [(2.43, 2.5), (2.43, -2.9), (-2.4, -2.9), (-2.4, 2.5)] shower_coords = [(6.568, -0.055355), (6.568, -2.002), (4.9768, -2.002), (4.9768, -0.055355)] foyer = Polygon(foyer_coords) bedroom = Polygon(bedroom_coords) livingroom = Polygon(livingroom_coords)
import ai2thor.controller import numpy as np import keyboard import time import random import cv2 import os player_size = 500 controller = ai2thor.controller.Controller() controller.start(player_screen_width=player_size * 1.5, player_screen_height=player_size) event = controller.step(dict(action='Initialize', gridSize=0.25, renderObjectImage = False)) def takePicture(event): set_confidence = 0.1 set_threshold = 0.3 # save image to disk cv2.imwrite("pic.png", event.cv2img) # load the COCO class labels labelsPath = 'yolo-object-detection/yolo-coco/coco.names' LABELS = open(labelsPath).read().strip().split("\n") # generate different colors for different classes np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # derive the paths to the YOLO weights and model configuration weightsPath = 'yolo-object-detection/yolo-coco/yolov3.weights' configPath = 'yolo-object-detection/yolo-coco/yolov3.cfg' # read pre-trained model and config file
def check_visible_objects_closed_receptacles(ctx, start_scene, end_scene): from itertools import product import ai2thor.controller controller = ai2thor.controller.BFSController() controller.local_executable_path = ( "unity/builds/thor-local-OSXIntel64.app/Contents/MacOS/thor-local-OSXIntel64" ) controller.start() for i in range(int(start_scene), int(end_scene)): print("working on floorplan %s" % i) controller.search_all_closed("FloorPlan%s" % i) visibility_object_id = None visibility_object_types = ["Mug", "CellPhone", "SoapBar"] for obj in controller.last_event.metadata["objects"]: if obj["pickupable"]: controller.step( action=dict( action="PickupObject", objectId=obj["objectId"], forceVisible=True, ) ) if ( visibility_object_id is None and obj["objectType"] in visibility_object_types ): visibility_object_id = obj["objectId"] if visibility_object_id is None: raise Exception("Couldn't get a visibility_object") bad_receptacles = set() for point in controller.grid_points: controller.step( dict(action="Teleport", x=point["x"], y=point["y"], z=point["z"]), raise_for_failure=True, ) for rot, hor in product(controller.rotations, controller.horizons): event = controller.step( dict(action="RotateLook", rotation=rot, horizon=hor), raise_for_failure=True, ) for j in event.metadata["objects"]: if j["receptacle"] and j["visible"] and j["openable"]: controller.step( action=dict( action="Replace", forceVisible=True, pivot=0, receptacleObjectId=j["objectId"], objectId=visibility_object_id, ) ) replace_success = controller.last_event.metadata[ "lastActionSuccess" ] if replace_success: if ( controller.is_object_visible(visibility_object_id) and j["objectId"] not in bad_receptacles ): bad_receptacles.add(j["objectId"]) print("Got bad receptacle: %s" % j["objectId"]) # import cv2 # cv2.imshow('aoeu', controller.last_event.cv2image()) # cv2.waitKey(0) controller.step( action=dict( action="PickupObject", objectId=visibility_object_id, forceVisible=True, ) )
def on_release(key): if key == keyboard.Key.esc: return False def draw(): new_frame = test.draw( test.position_to_tuple( controller.last_event.metadata["agent"]["position"]), controller.last_event.metadata["agent"]["rotation"]["y"], top_view["frame"], top_view["pos_translator"], controller) plt.imshow(new_frame) plt.show() plt.pause(0.001) def object_detection(event): frame = event.cv2img cv2.imwrite("image.jpg", frame) detection.detect("image.jpg") if __name__ == "__main__": controller = ai2thor.controller.Controller() controller.start(player_screen_width=1000, player_screen_height=700) controller.reset('FloorPlan24') controller.step(dict(action='Initialize', gridSize=0.25)) with keyboard.Listener(on_press=on_press, on_release=on_release) as listener: listener.join()
# store the result back to the event bbox_frame = np.array(img) event.bbox_3d_frame = bbox_frame return event if __name__ == "__main__": # give the height and width of the 2D image and scene id w, h = 900, 900 scene = "FloorPlan2{:02d}_physics".format(1) # allocate controller and initialize the scene and agent # local_path = "src/ai2thor/unity/builds/thor-local-OSXIntel64.app/Contents/MacOS/AI2-Thor" local_path = "" controller = ai2thor.controller.Controller(local_path=local_path) _ = controller.start(width=w, height=h) _ = controller.reset(scene) event = controller.step( dict(action='Initialize', gridSize=0.25, renderClassImage=True, renderObjectImage=True, renderDepthImage=True, fieldOfView=90)) # do something then draw the 3D bbox in 2D image event = controller.step(dict(action="MoveAhead")) event = controller.step(dict(action="MoveAhead")) event = controller.step( dict(action="Rotate", rotation=dict(x=0, y=30, z=0))) event = draw_3d_bbox(event)
def save_topview_image(args): controller = ai2thor.controller.Controller(quality='High', fullscreen=False) controller.start(player_screen_width=300, player_screen_height=300) scene = args.sence #scene = random.randint(1, 30) controller.reset('FloorPlan' + str(scene)) visibilityDistance = 10 renderObjectImage = True event = controller.step( dict(action='Initialize', gridSize=0.25, cameraY=0.6, renderObjectImage=renderObjectImage, visibilityDistance=visibilityDistance)) def draw_box(event): r = 255 g = 0 b = 0 cv2.imwrite('test1.png', event.cv2img) img = cv2.imread("test1.png", 3) for o in event.metadata['objects']: if o['visible'] == True and o['objectType'] != 'CounterTop' and o[ 'objectType'] != 'Window': a = event.instance_detections2D[o['objectId']] cv2.rectangle(img, (a[0], a[1]), (a[2], a[3]), (r, g, b), 1) cv2.putText(img, o['objectType'], (a[0], a[1] + 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 0), 1) return img def display_image(img): cv2.imshow("image", img) #cv2.namedWindow("image",cv2.WND_PROP_FULLSCREEN) #cv2.setWindowProperty("image", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) cv2.waitKey(0) cv2.destroyAllWindows() print(x, y) def on_press(key): global rotation, horizon, event, x, y, image, r1, g1, b1, r2, g2, b2 print(x, y) try: if key.char == 'w': event = controller.step(dict(action='MoveAhead')) draw_topview2(controller) print(event.metadata['lastActionSuccess']) if event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 90.0: x += 12 if g1 < 255: g1 += 30 else: b1 += 30 if b1 > 255: b1 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 180.0: y += 17 if g1 < 255: g1 += 30 else: b1 += 30 if b1 > 255: b1 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 270.0: x -= 12 if g1 < 255: g1 += 30 else: b1 += 30 if b1 > 255: b1 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 0.0: y -= 17 if g1 < 255: g1 += 30 else: b1 += 30 if b1 > 255: b1 = 0 cv2.circle(image, (x, y), 3, (r1, g1, b1), -1) display_image(image) elif key.char == 's': event = controller.step(dict(action='MoveBack')) if event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 90.0: x -= 12 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 180.0: y -= 17 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 270.0: x += 12 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 0.0: y += 17 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 cv2.circle(image, (x, y), 3, (r2, g2, b2), -1) display_image(image) elif key.char == 'd': event = controller.step(dict(action='MoveRight')) if event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 90.0: y += 17 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 180.0: x -= 12 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 270.0: y -= 17 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 0.0: x += 12 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 cv2.circle(image, (x, y), 3, (r2, g2, b2), -1) display_image(image) elif key.char == 'a': event = controller.step(dict(action='MoveLeft')) if event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 90.0: y -= 17 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 180.0: x += 12 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 270.0: y += 17 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 elif event.metadata['lastActionSuccess'] == True and ( event.metadata['agent']['rotation']['y']) == 0.0: x -= 12 if g2 < 255: g2 += 30 else: b2 += 30 if b2 > 255: b2 = 0 cv2.circle(image, (x, y), 3, (r2, g2, b2), -1) display_image(image) elif key.char == 'p': display_image(draw_box(event)) elif key.char == 'c': draw_topview2(controller) image = cv2.imread("topview2.png") display_image(image) elif key.char == 'f': takePicture(event) except: if key == keyboard.Key.up: horizon -= 10 event = controller.step(dict(action='Look', horizon=horizon)) elif key == keyboard.Key.down: horizon += 10 event = controller.step(dict(action='Look', horizon=horizon)) elif key == keyboard.Key.right: event = controller.step(dict(action='RotateRight')) display_image(image) elif key == keyboard.Key.left: event = controller.step(dict(action='RotateLeft')) display_image(image) def on_release(key): if key == keyboard.Key.esc: return False def draw_topview1(controller): t = get_agent_map_data(controller) new_frame = add_agent_view_triangle( position_to_tuple( controller.last_event.metadata["agent"]["position"]), controller.last_event.metadata["agent"]["rotation"]["y"], t["frame"], t["pos_translator"], ) plt.imshow(new_frame) plt.axis('off') plt.savefig('topview.png') im = Image.open('topview.png') im = trim(im) im.save("topview.png") def draw_topview2(controller): t = get_agent_map_data(controller) new_frame = add_agent_view_triangle( position_to_tuple( controller.last_event.metadata["agent"]["position"]), controller.last_event.metadata["agent"]["rotation"]["y"], t["frame"], t["pos_translator"], ) plt.imshow(new_frame) plt.axis('off') plt.savefig('topview2.png') im = Image.open('topview2.png') im = trim(im) im.save("topview2.png") with keyboard.Listener(on_press=on_press, on_release=on_release) as listener: listener.join()
def dump(scene, angle, resolution=(300, 300)): ''' Dump needed data to hdf5 file to speed up training. Dumped file can be loaded using: f = h5py.File(filename, 'r'), where: - f['locations'][()]: numpy array of all states in format (x, z, rotation, looking angle) - f['observations'][()]: numpy array of RGB images of corresponding states in f['locations'] - f['graph'][()]: numpy array representing transition graph between states. e.g: f[0] = array([ 16., 272., 1., -1.], dtype=float32) means from 1st locations, the agent will reach 16th state by taking action 0 (move forward), 272th state by taking action 1 (move backward), reach 1th state by taking action 2 (rotate right) and cannot take action 3 (rotate left) indicated by -1 value. - f['visible_objects'][()]: visible objects at corresponding states in f['locations'] - f['shortest'][()]: numpy array with shape of (num_states, num_states) indicating the shortest path length between every pair of states. ''' f = h5py.File("dumped/{}.hdf5".format(scene), "w") observations = [] dump_features = [] locations = [] visible_objects = [] controller = ai2thor.controller.Controller() controller.start() controller.reset(scene) event = controller.step( dict(action='Initialize', gridSize=0.5, cameraY=1.0, visibilityDistance=1.0)) y_coord = event.metadata['agent']['position']['y'] locations.append((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) # Using BFS to discover all reachable positions in current environment. visited = set() visited.add((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) while len(locations) > 0: loc = locations.pop(0) for act in ALL_POSSIBLE_ACTIONS: controller.step( dict(action='Teleport', x=loc[0], y=y_coord, z=loc[1])) event = controller.step(dict(action=act)) if event.metadata['lastActionSuccess']: new_loc = (event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z']) if new_loc not in visited: visited.add(new_loc) locations.append(new_loc) all_locs = list(visited) print("{} locations".format(len(all_locs))) states = [] rotations = np.linspace(0, 360, int(360 // angle) + 1)[:-1].tolist() movement = { 0: [1, 0], 90.0: [0, 1], 180.0: [-1, 0], 270.0: [0, -1], 22.5: [1, 0], 67.5: [0, 1], 45.0: [1, 1], 112.5: [0, 1], 135.0: [-1, 1], 157.5: [-1, 0], 202.5: [-1, 0], 225.0: [-1, -1], 247.5: [0, -1], 292.5: [0, -1], 315: [1, -1], 337.5: [1, 0] } # Adding rotations and looking angles for loc in all_locs: for rot in rotations: states.append((loc[0], loc[1], rot)) # for horot in [-30, 0, 30, 60]: # states.append((loc[0], loc[1], rot, horot)) # ------------------------------------------------------------------------------ ## Calculate shortest path length array sta2idx = dict(zip(states, range(len(states)))) loc2idx = dict(zip(all_locs, range(len(all_locs)))) shortest_loc = np.zeros((len(all_locs), len(all_locs))) for i in range(len(all_locs)): dists = cal_min_dist(i, all_locs, loc2idx) for j, d in enumerate(dists): if j != i: shortest_loc[i, j] = d shortest_loc[j, i] = d shortest_state = np.zeros((len(states), len(states))) for i in range(len(states)): for j in range(len(states)): if i != j: from_loc = loc2idx[states[i][0], states[i][1]] to_loc = loc2idx[states[j][0], states[j][1]] shortest_state[i, j] = shortest_loc[from_loc, to_loc] shortest_state[j, i] = shortest_state[i, j] # ------------------------------------------------------------------------------ # Building transition graph graph = np.zeros(shape=(len(states), 4), dtype=int) for state in states: loc = (state[0], state[1]) rot = state[2] to_states = [] move = movement[rot] to_states.append((loc[0] + move[1] * 0.5, loc[1] + move[0] * 0.5, rot)) # move ahead to_states.append( (loc[0] - move[1] * 0.5, loc[1] - move[0] * 0.5, rot)) # move back to_states.append( (loc[0], loc[1], rot + angle if rot + angle < 360 else 0)) # turn right to_states.append( (loc[0], loc[1], rot - angle if rot - angle >= 0 else 360 - angle)) # turn left # to_states.append((loc[0], loc[1], rot + 30)) # look down # to_states.append((loc[0], loc[1], rot, horot - 30)) # look up state_idx = sta2idx[state] for i, new_state in enumerate(to_states): if new_state in sta2idx: graph[state_idx][i] = sta2idx[new_state] else: graph[state_idx][i] = -1 # ------------------------------------------------------------------------------ laser = {} ## Calculate laser for loc in all_locs: pos = (loc[0], loc[1], 0) north = 0 while graph[sta2idx[pos]][0] != -1: north += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 0 pos = (loc[0], loc[1], 0) south = 0 while graph[sta2idx[pos]][1] != -1: south += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 0 pos = (loc[0], loc[1], 90) right = 0 while graph[sta2idx[pos]][0] != -1: right += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 90 pos = (loc[0], loc[1], 90) left = 0 while graph[sta2idx[pos]][1] != -1: left += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 90 for r in rotations: if r > 315.0 or r < 45.0: laser[(loc[0], loc[1], r)] = [north, south, right, left] elif r > 45.0 and r < 135.0: laser[(loc[0], loc[1], r)] = [right, left, south, north] elif r > 135.0 and r < 225.0: laser[(loc[0], loc[1], r)] = [south, north, left, right] elif r > 225.0 and r < 315.0: laser[(loc[0], loc[1], r)] = [left, right, north, south] if 45.0 in rotations: for loc in all_locs: pos = (loc[0], loc[1], 45.0) north = 0 while graph[sta2idx[pos]][0] != -1: north += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 45.0 pos = (loc[0], loc[1], 45.0) south = 0 while graph[sta2idx[pos]][1] != -1: south += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 45.0 pos = (loc[0], loc[1], 135.0) right = 0 while graph[sta2idx[pos]][0] != -1: right += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 135.0 pos = (loc[0], loc[1], 135.0) left = 0 while graph[sta2idx[pos]][1] != -1: left += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 135.0 laser[(loc[0], loc[1], 45.0)] = [north, south, right, left] laser[(loc[0], loc[1], 225.0)] = [south, north, left, right] laser[(loc[0], loc[1], 135.0)] = [right, left, south, north] laser[(loc[0], loc[1], 315.0)] = [left, right, north, south] lasers = [] for state in states: lasers.append(laser[state]) # ------------------------------------------------------------------------------ # Adding observations for state in states: vis_objects = set() event = controller.step( dict(action='TeleportFull', x=state[0], y=1.25, z=state[1], rotation=state[2], horizon=30)) resized_frame = cv2.resize(event.frame, (resolution[0], resolution[1])) observations.append(resized_frame) visible = [obj for obj in event.metadata['objects'] if obj['visible']] for obj in visible: vis_objects.add(obj['objectType']) if len(vis_objects) > 0: visible_objects.append(",".join(list(vis_objects))) else: visible_objects.append("") # ------------------------------------------------------------------------------ print("{} states".format(len(states))) all_visible_objects = list( set(",".join([o for o in visible_objects if o != '']).split(','))) all_visible_objects.sort() for c in ['Lamp', 'PaperTowelRoll', 'Glassbottle']: if c in all_visible_objects: all_visible_objects.remove(c) controller.stop() f.create_dataset("locations", data=np.asarray(states, np.float32)) f.create_dataset("observations", data=np.asarray(observations, np.uint8)) f.create_dataset("graph", data=graph) f.create_dataset("visible_objects", data=np.array(visible_objects, dtype=object), dtype=h5py.special_dtype(vlen=str)) f.create_dataset("all_visible_objects", data=np.array(all_visible_objects, dtype=object), dtype=h5py.special_dtype(vlen=str)) f.create_dataset("shortest", data=shortest_state) f.create_dataset("lasers", data=np.asarray(lasers, np.float32)) f.close() return y_coord
import ai2thor.controller import keyboard import time import voicecontrol if __name__ == "__main__": #Start up ai2thor and initialize floor controller = ai2thor.controller.Controller() controller.start(player_screen_height=666, player_screen_width=666) controller.reset('FloorPlan3') ##FloorPlan: 0->30 ; 201->230 ; 301->330 ; 401->430 controller.step(dict(action='Initialize', gridSize=0.5)) print('Initialized') #Control while True: if keyboard.is_pressed('a'): event = controller.step(dict(action='MoveLeft')) time.sleep(0.3) elif keyboard.is_pressed('d'): event = controller.step(dict(action='MoveRight')) time.sleep(0.3) elif keyboard.is_pressed('w'): event = controller.step(dict(action='MoveAhead')) time.sleep(0.3) elif keyboard.is_pressed('s'): event = controller.step(dict(action='MoveBack')) time.sleep(0.3) elif keyboard.is_pressed('up arrow'): event = controller.step(dict(action='LookUp')) time.sleep(0.3)
{ "id": "unity", "port": 8200, "controller": ai2thor.controller.Controller() }, { "id": "robot", "port": 9200, "controller": ai2thor.robot_controller.Controller() } # {'id': 'robot', 'port': 9000, 'controller': ai2thor.robot_controller.Controller()} ] for run_config in runs: port = run_config["port"] controller = run_config["controller"] event = controller.start(start_unity=False, host="127.0.0.1", port=port) # event = controller.step({'action': 'ChangeQuality', 'quality': 'High'}) # event = controller.step({"action": "ChangeResolution", "x": 300, "y": 300}) for fp in fps: print(fp) for i in range(1): event = controller.reset(fp) # event = controller.step(dict(action='Initialize', gridSize=0.25, fieldOfView=90, renderInstanceSegmentation=True)) # event = controller.step(dict(action='InitialRandomSpawn', forceVisible=True, maxNumRepeats=10, randomSeed=1)) # event = controller.step(dict(action='MoveAhead', noise=0.02)) event = controller.step(dict(action="RotateLeft")) print("event for '{}':".format(run_config["id"])) pprint(event.metadata) time.sleep(1)