def check_gpu_usage_and_restart_env(env, nav_env): command = "nvidia-smi --query-gpu=memory.free --format=csv" out = subprocess.check_output( shlex.split(command)).decode("utf-8").split("\n") mb_remain = int(out[1].split()[0]) if mb_remain < 256: env.controller.end_scene(None, None) new_env = McsEnv() new_nav_env = McsNavWrapper(new_env) else: new_env = env new_nav_env = nav_env return new_env, new_nav_env
def train(rank, args, shared_model, counter, lock, optimizer): torch.manual_seed(args.seed + rank) env = McsEnv(seed=args.seed + rank, task="interaction_scenes", scene_type="traversal") nav_env, navigator, model, _, _ = get_model_from_task(env, args.task) nav_env.reset(random_init=True) set_object_goal(navigator, env.scene_config) model = model.to(args.device) model.train() state = navigator.get_observation(nav_env.step_output) done = True # monitoring total_reward_for_num_steps_list = [] episode_total_rewards_list = [] avg_reward_for_num_steps_list = [] total_length = 0 episode_length = 0 n_episode = 0 all_rewards_in_episode = [] while True: # Sync with the shared model model.load_state_dict(shared_model.state_dict()) done_mask = torch.zeros(size=(1,1)).to(args.device) undone_mask = torch.ones(size=(1,1)).to(args.device) if done: rnn_hidden_states = torch.zeros(size=(model.net.num_recurrent_layers, 1, 512)).to(args.device) prev_action = torch.zeros(1, 1).to(args.device) mask = done_mask else: rnn_hidden_states = rnn_hidden_states.detach() values = [] log_probs = [] rewards = [] entropies = [] for step in range(args.num_steps): episode_length += 1 total_length += 1 batch = batch_obs(state, args.device) value, action, action_log_probs, rnn_hidden_states = model.act(batch, rnn_hidden_states, prev_action, mask) # torch.cuda.empty_cache() prev_action.copy_(action) mask = undone_mask entropies.append(-action_log_probs * torch.exp(action_log_probs)) log_probs.append(action_log_probs) action_int = action.cpu().numpy()[0][0].item() reward, done = navigator.navigation_step_with_reward( nav_env, action_int, episode_length >= args.max_episode_length ) state = navigator.get_observation(nav_env.step_output) values.append(value) rewards.append(reward) all_rewards_in_episode.append(reward) with lock: counter.value += 1 if done: total_length -= 1 total_reward_for_episode = sum(all_rewards_in_episode) episode_total_rewards_list.append(total_reward_for_episode) all_rewards_in_episode = [] episode_success = (reward == 9.99) print('Process {} Episode {} Over with Length: {} and Reward: {: .3f}, Success: {}. Total Trained Length: {}'.format( rank, n_episode, episode_length, total_reward_for_episode, episode_success, total_length)) # if args.device != "cpu:": # env, nav_env = check_gpu_usage_and_restart_env(env, nav_env) if episode_success: nav_env.reset(random_init=True) else: nav_env.reset(random_init=True, repeat_current=False) set_object_goal(navigator, env.scene_config) state = navigator.get_observation(nav_env.step_output) sys.stdout.flush() episode_length = 0 n_episode += 1 break total_reward_for_num_steps = sum(rewards) total_reward_for_num_steps_list.append(total_reward_for_num_steps) avg_reward_for_num_steps = total_reward_for_num_steps / len(rewards) avg_reward_for_num_steps_list.append(avg_reward_for_num_steps) # Backprop and optimisation R = torch.zeros(1, 1).to(args.device) gae = torch.zeros(1, 1).to(args.device) batch = batch_obs(state, args.device) if not done: # to change last reward to predicted value to .... value, _, _, _ = model.act(batch, rnn_hidden_states, prev_action, mask) R = value.detach() values.append(R) policy_loss = 0 value_loss = 0 # import pdb;pdb.set_trace() # good place to breakpoint to see training cycle for i in reversed(range(len(rewards))): R = args.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) # Generalized Advantage Estimation delta_t = rewards[i] + args.gamma * values[i + 1] - values[i] gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - log_probs[i] * gae.detach() - \ args.entropy_coef * entropies[i] optimizer.zero_grad() (policy_loss + args.value_loss_coef * value_loss).backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) ensure_shared_grads(model, shared_model) optimizer.step()
from gym_ai2thor.envs.mcs_env import McsEnv from meta_ontroller.meta_controller import MetaController import sys if __name__ == "__main__": env = McsEnv(task="interaction_scenes", scene_type="transferral", start_scene_number=2) metaController = MetaController(env) while env.current_scene < len(env.all_scenes) - 1: env.reset() result = metaController.excecute() sys.stdout.flush()
from gym_ai2thor.envs.mcs_env import McsEnv from int_phy.scene_state import SceneState import matplotlib.pyplot as plt scene_name = "object_permanence" start_scene_number = 3 env = McsEnv(task="intphys_scenes", scene_type=scene_name, start_scene_number=start_scene_number) object_states = [] for _ in range(1): env.reset(random_init=False) # print(env.current_scene, env.scene_config['answer'], len(env.scene_config['goal']['action_list'])) scene_state = None for i, x in enumerate(env.scene_config['goal']['action_list']): # print(i) if i == 0: scene_state = SceneState(env.step_output) else: scene_state.update(env.step_output) env.step(action=x[0]) object_states.append(scene_state) env.controller.end_scene(None, None) for i, scene_state in enumerate(object_states): plt.figure() for j, (id, obj_state) in enumerate(scene_state.object_state_dict.items()): v_xs = [v[0] for v in obj_state.velocity_history]
SHAPE_TYPES = ["cylinder", "sphere", "cube"] def set_scale(config, scale): config['objects'][0]['shows'][0]['scale']['x'] = scale config['objects'][0]['shows'][0]['scale']['y'] = scale config['objects'][0]['shows'][0]['scale']['z'] = scale if __name__ == "__main__": scene_name = "github_scenes" + "/collect_object_shape_data" for _, shape_type in enumerate(SHAPE_TYPES): print("Scene: {}".format(shape_type)) os.makedirs(os.path.join("appearance", "object_mask_frame", shape_type), exist_ok=True) env = McsEnv(task="intphys_scenes", scene_type=scene_name) env.reset(random_init=False) env.scene_config['objects'][0]['type'] = shape_type env.step_output = env.controller.start_scene(env.scene_config) object_frames = [] for scale in [0.2 + 0.1*i for i in range(10)]: set_scale(env.scene_config, scale) env.step_output = env.controller.start_scene(env.scene_config) for i, action in enumerate(env.scene_config['goal']['action_list']): env.step(action=action[0]) assert len(env.step_output.object_list) <= 1 if len(env.step_output.object_list) == 1: obj_state = ObjectState( env.step_output.object_list[0], env.step_output.depth_mask_list[-1], env.step_output.object_mask_list[-1] )
from gym_ai2thor.envs.mcs_env import McsEnv from meta_ontroller.meta_controller import MetaController import sys from copy import deepcopy import json import os from planner.ff_planner_handler import PlanParser if __name__ == "__main__": env = McsEnv(task="interaction_scenes", scene_type="transferral") # env = McsEnv(task="searchObjeInReceptacletraining") env.reset() # metaController = MetaController(env) # result = metaController.excecute() # exit(0) while env.current_scence <= len(env.all_scenes): print(env.current_scence) metaController = MetaController(env) meta_stage = 0 search_cnt = 0 while True: print("Meta-Stage: {}".format(meta_stage)) result_plan = metaController.plan_on_current_state() for plan in result_plan: print(plan) break if result_plan[0]['action'] == "LookForObjectInReceptacle": new_config = deepcopy(metaController.env.scene_config) new_config['performerStart']['position'] = { "x": metaController.env.step_output.position['x'],
from gym_ai2thor.envs.mcs_env import McsEnv from meta_ontroller.meta_controller import MetaController import sys if __name__ == "__main__": env = McsEnv(task="playroom", scene_type=None, start_scene_number=0) metaController = MetaController(env) while env.current_scene < len(env.all_scenes) - 1: env.reset() result = metaController.excecute(replan=False) sys.stdout.flush()
from gym_ai2thor.envs.mcs_env import McsEnv from locomotion.network import Position_Embbedding_Network, HIDDEN_STATE_SIZE, NUM_HIDDEN_LAYER from locomotion.train import MODEL_SAVE_DIR from int_phy_recollect_position import get_locomotion_feature import matplotlib.pyplot as plt import torch import os scene_name = "object_permanence" start_scene_number = 0 env = McsEnv(task="intphys_scenes", scene_type=scene_name, start_scene_number=start_scene_number) net = Position_Embbedding_Network() net.eval() net.load_state_dict( torch.load( os.path.join(MODEL_SAVE_DIR, "model_{}layerGRU.pth".format(NUM_HIDDEN_LAYER)))) colors = ['ob', 'og', 'or', 'oc'] for _ in range(30): env.reset(random_init=False) n_object_hidden_state = { obj['id']: [] for obj in env.scene_config['objects'] if "occluder" not in obj['id'] } obj_seen = { obj['id']: False
exist_ok=True) else: os.makedirs(os.path.join(DATA_SAVE_DIR, "without_occluder", shape_type, scene_type), exist_ok=True) object_locomotions = {} start_scene_number = 0 for n_restart in range(N_RESTART): object_locomotions = {} for _, shape_type in enumerate(SHAPE_TYPES): object_locomotions[shape_type] = [] env = McsEnv(task="intphys_scenes/validation_intphys_scenes_true", scene_type=scene_type, start_scene_number=start_scene_number) start_scene_number += SAVE_SCENE_LENGTH for _ in range(SAVE_SCENE_LENGTH): env.reset(random_init=False) if len(env.scene_config['goal']['action_list']) != 40: continue env_new_objects = [] env_occluders = [] env_ramps = [] for obj in env.scene_config['objects']: if "occluder" in obj['id']: env_occluders.append(obj) continue if "ramp" in obj['id']: env_ramps.append(obj)
self.episode['obs'] = self.episode['obs'][-10:] self.episode['action'] = self.episode['action'][-10:] self.episode['next_obs'] = self.episode['next_obs'][-10:] @staticmethod def preprocess(img): img = img.resize((50, 50), Image.ANTIALIAS) return np.transpose(np.array(img), (2, 0, 1)) / 255 if __name__ == "__main__": import time env = McsEnv() domain_file = "planner/domains/Playroom_domain.pddl" facts_file = "planner/sample_problems/playroom_facts.pddl" parser = PlanParser() replay_buffer = [] metaController = MetaController(env) episode = 0 while episode < 100: print("Episode: {}".format(episode)) env.reset() PlanParser.scene_config_to_pddl(env.scene_config, random_pick_up(env.scene_config), facts_file) result_plan = parser.get_plan_from_file(domain_file, facts_file) epsd_collector = Episode_collector() for action in result_plan:
def test(rank, args, shared_model, counter): torch.manual_seed(args.seed + rank) env = McsEnv(seed=args.seed + rank, task="interaction_scenes", scene_type="traversal") nav_env, navigator, model = get_model_from_task(env, args.task) nav_env.reset(random_init=True) set_object_goal(navigator, env.scene_config) model = model.to(args.device) model.eval() state = navigator.get_observation(nav_env.step_output) reward_sum = 0 done = True save = 'steps{}-process{}-lr{}-entropy_coef{}-max_grad_norm{}'.format( args.num_steps, args.num_processes, args.lr, args.entropy_coef, args.max_grad_norm) save = os.path.join('logs', save) os.makedirs(save, exist_ok=True) logger = CSVLogger(os.path.join(save, 'test.csv')) fileds = ['episode_success_rate', 'frames_rendered'] logger.log(fileds) start_time = time.time() episode_length = 0 ckpt_counter = 0 n_test_episode = 40 while True: success_cnt = 0 for _ in range(n_test_episode): while True: done_mask = torch.zeros(size=(1, 1)).to(args.device) undone_mask = torch.ones(size=(1, 1)).to(args.device) episode_length += 1 # Sync with the shared model if done: model.load_state_dict(deepcopy(shared_model.state_dict())) rnn_hidden_states = torch.zeros( size=(model.net.num_recurrent_layers, 1, 512)).to(args.device) prev_action = torch.zeros(1, 1).to(args.device) mask = done_mask else: rnn_hidden_states = rnn_hidden_states.detach() with torch.no_grad(): batch = batch_obs(state, args.device) value, action, action_log_probs, rnn_hidden_states = model.act( batch, rnn_hidden_states, prev_action, mask) # torch.cuda.empty_cache() prev_action.copy_(action) mask = undone_mask action_int = action.cpu().numpy()[0][0].item() reward, done = navigator.navigation_step_with_reward( nav_env, action_int, episode_length >= args.max_episode_length) state = navigator.get_observation(nav_env.step_output) reward_sum += reward if done: episode_success = (reward == 9.99) if episode_success: success_cnt += 1 print( "Time {}, num steps over all threads {}, FPS {:.0f}, episode reward {: .3f}, success {}, episode length {}" .format( time.strftime( "%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), counter.value, counter.value / (time.time() - start_time), reward_sum, episode_success, episode_length)) # if args.device != "cpu:": # env, nav_env = check_gpu_usage_and_restart_env(env, nav_env) reward_sum = 0 episode_length = 0 nav_env.reset(random_init=True) set_object_goal(navigator, env.scene_config) state = navigator.get_observation(nav_env.step_output) break torch.save(model.state_dict(), os.path.join(save, "ckpt{}.pth".format(ckpt_counter))) logger.log( ["{: .2f}".format(success_cnt / n_test_episode), counter.value]) time.sleep(args.test_sleep_time) ckpt_counter += 1 if ckpt_counter == 48 * 2: env.controller.end_scene(None, None) logger.close() break
return math.sqrt(x**2 + y**2) # scene_name = "github_scenes/spatio_temporal_continuity/implausible" scene_name = "object_permanence" net = Position_Embbedding_Network() net.eval() net.load_state_dict( torch.load( os.path.join(MODEL_SAVE_DIR, "model_{}_hidden_state.pth".format(HIDDEN_STATE_SIZE)))) start_scene_number = 0 env_1 = McsEnv(task="intphys_scenes", scene_type=scene_name, start_scene_number=start_scene_number) for _ in range(10): env_1.reset(random_init=False) env_new_objects = [] env_occluders = [] for obj in env_1.scene_config['objects']: if "occluder" not in obj['id']: env_new_objects.append(obj) else: env_occluders.append(obj) for one_obj in env_new_objects: plt.figure(figsize=(6, 4)) plt.xlim((-5, 5))
if __name__ == '__main__': mp.set_start_method("spawn") os.environ['OMP_NUM_THREADS'] = '1' # os.environ['CUDA_VISIBLE_DEVICES'] = "" args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() if args.cuda: print('Using', torch.cuda.get_device_name(0)) torch.cuda.init() args.device = "cuda:0" else: args.device = "cpu" torch.manual_seed(args.seed) env = McsEnv() _, _, model, train_fun, test_fun = get_model_from_task(env, args.task) shared_model = model # if args.model: # print("{} loaded".format(args.model)) # shared_model.load_state_dict(torch.load(os.path.join(os.getcwd(), args.model))) if args.cuda: shared_model = shared_model.cuda() shared_model.share_memory() # env.controller.end_scene(None, None) # above env initialisation was only to find certain params needed optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
import matplotlib.pyplot as plt from pathfinding.core.grid import Grid from pathfinding.finder.a_star import AStarFinder from pathfinding.core.diagonal_movement import DiagonalMovement from agent_util import * from gym_ai2thor.envs.mcs_env import McsEnv from meta_controller.meta_controller import MetaController import sys from frame_collector import Frame_collector if __name__ == "__main__": collector = Frame_collector(scene_dir="intphy_task_img", start_scene_number=0) env = McsEnv(task="eval3_dataset", scene_type="agent_obj_preference", seed=50, start_scene_number=0, frame_collector=collector, set_trophy=False) # Assumes that these videos are of equal length/frames! # mask_cap = cv2.VideoCapture('test_single/50_mask.mkv') # color_cap = cv2.VideoCapture('test_single/50_color.mkv') M_wall, M_gnd = get_homographies() def step(cam_im, mask_im, info, first_frame=False): gnd_mask = gnd_trans(mask_im, M_gnd) gnd_rgb = gnd_trans(cam_im, M_gnd) trans_im = wall_trans(cam_im, M_wall)
if __name__ == "__main__": for scene_type in SCENE_TYPES: for _, shape_type in enumerate(SHAPE_TYPES): os.makedirs(os.path.join(DATA_SAVE_DIR, "ground", shape_type, scene_type), exist_ok=True) object_locomotions = {} start_scene_number = 0 for n_restart in range(N_RESTART): object_locomotions = {} for _, shape_type in enumerate(SHAPE_TYPES): object_locomotions[shape_type] = [] env = McsEnv(task="intphys_scenes", scene_type=scene_type, start_scene_number=start_scene_number) start_scene_number += SAVE_SCENE_LENGTH for _ in range(SAVE_SCENE_LENGTH): env.reset(random_init=False) env_new_objects = [] env_occluders = [] env_ramps = [] for obj in env.scene_config['objects']: if "occluder" in obj['id']: env_occluders.append(obj) continue if "ramp" in obj['id']: env_ramps.append(obj) continue if obj['type'] in SHAPE_TYPES: