def learn_graph(args): elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() common.ensure_object_targets(True) set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], cacheAllTarget=True, render_device=args['render_gpu'], use_discrete_action=True, include_object_target=True, include_outdoor_target=True, discrete_angle=True) # create motion __graph_warmstart = args['warmstart'] args['warmstart'] = args['motion_warmstart'] motion = create_motion(args, task) # create graph args['warmstart'] = __graph_warmstart graph = GraphPlanner(motion) # logger logger = utils.MyLogger(args['save_dir'], True) logger.print("> Training Mode = {}".format(args['training_mode'])) logger.print("> Graph Eps = {}".format(args['graph_eps'])) logger.print("> N_Trials = {}".format(args['n_trials'])) logger.print("> Max Exploration Steps = {}".format(args['max_exp_steps'])) # Graph Building logger.print('Start Graph Building ...') if args['warmstart'] is not None: filename = args['warmstart'] logger.print(' >>> Loading Pre-Trained Graph from {}'.format(filename)) with open(filename, 'rb') as file: g_params = pickle.load(file) graph.set_parameters(g_params) train_mode = args['training_mode'] if train_mode in ['mle', 'joint']: graph.learn(n_trial=args['n_trials'], max_allowed_steps=args['max_exp_steps'], eps=args['graph_eps'], logger=logger) if train_mode in ['evolution', 'joint']: graph.evolve() # TODO: not implemented yet logger.print('######## Final Stats ###########') graph._show_prior_room(logger=logger) graph._show_prior_object(logger=logger) return graph
def train( args=None, houseID=0, reward_type='indicator', success_measure='center', multi_target=False, include_object_target=False, algo='pg', model_name='cnn', # NOTE: optional: model_name='rnn' iters=2000000, report_rate=20, save_rate=1000, eval_range=200, log_dir='./temp', save_dir='./_model_', warmstart=None, log_debug_info=True): if 'scheduler' in args: scheduler = args['scheduler'] else: scheduler = None if args is None: args = common.create_default_args(algo) hardness = args['hardness'] max_birthplace_steps = args['max_birthplace_steps'] if hardness is not None: print('>>> Hardness Level = {}'.format(hardness)) if max_birthplace_steps is not None: print('>>>> Max BirthPlace Steps = {}'.format(max_birthplace_steps)) env = common.create_env(houseID, task_name=args['task_name'], false_rate=args['false_rate'], reward_type=reward_type, hardness=hardness, max_birthplace_steps=max_birthplace_steps, success_measure=success_measure, segment_input=args['segment_input'], depth_input=args['depth_input'], render_device=args['render_gpu'], cacheAllTarget=args['multi_target'], use_discrete_action=('dpg' not in algo), include_object_target=include_object_target) trainer = common.create_trainer(algo, model_name, args) logger = utils.MyLogger(log_dir, True) if multi_target: assert hasattr(trainer, 'set_target') if warmstart is not None: if os.path.exists(warmstart): logger.print('Warmstarting from <{}> ...'.format(warmstart)) trainer.load(warmstart) else: logger.print( 'Warmstarting from save_dir <{}> with version <{}> ...'.format( save_dir, warmstart)) trainer.load(save_dir, warmstart) logger.print('Start Training') if log_debug_info: common.debugger = utils.MyLogger(log_dir, True, 'full_logs.txt') else: common.debugger = utils.FakeLogger() episode_rewards = [0.0] episode_success = [0.0] episode_length = [0.0] episode_targets = ['kitchen'] trainer.reset_agent() if multi_target: obs = env.reset() target_room = env.info['target_room'] trainer.set_target(target_room) episode_targets[-1] = target_room else: env.reset(target='kitchen') assert not np.any(np.isnan(obs)), 'nan detected in the observation!' obs = obs.transpose([1, 0, 2]) logger.print('Observation Shape = {}'.format(obs.shape)) episode_step = 0 t = 0 best_res = -1e50 elap = time.time() update_times = 0 print('Starting iterations...') try: while (len(episode_rewards) <= iters): idx = trainer.process_observation(obs) # get action if scheduler is not None: noise_level = scheduler.value(len(episode_rewards) - 1) action = trainer.action(noise_level) else: action = trainer.action() #proc_action = [np.exp(a) for a in action] # environment step obs, rew, done, info = env.step(action) assert not np.any( np.isnan(obs)), 'nan detected in the observation!' obs = obs.transpose([1, 0, 2]) episode_step += 1 episode_length[-1] += 1 terminal = (episode_step >= args['episode_len']) # collect experience trainer.process_experience(idx, action, rew, done, terminal, info) episode_rewards[-1] += rew if rew > 5: # magic number episode_success[-1] = 1.0 if done or terminal: trainer.reset_agent() if multi_target: obs = env.reset() target_room = env.info['target_room'] trainer.set_target(target_room) episode_targets.append(target_room) else: obs = env.reset(target='kitchen') assert not np.any( np.isnan(obs)), 'nan detected in the observation!' obs = obs.transpose([1, 0, 2]) episode_step = 0 episode_rewards.append(0) episode_success.append(0) episode_length.append(0) # update all trainers trainer.preupdate() stats = trainer.update() if stats is not None: update_times += 1 if common.debugger is not None: common.debugger.print( '>>>>>> Update#{} Finished!!!'.format(update_times), False) # save results if ((done or terminal) and (len(episode_rewards) % save_rate == 0)) or\ (len(episode_rewards) > iters): trainer.save(save_dir) logger.print( 'Successfully Saved to <{}>'.format(save_dir + '/' + trainer.name + '.pkl')) if np.mean(episode_rewards[-eval_range:]) > best_res: best_res = np.mean(episode_rewards[-eval_range:]) trainer.save(save_dir, "best") # display training output if ((update_times % report_rate == 0) and (algo != 'pg') and (stats is not None)) or \ ((update_times == 0) and (algo != 'pg') and (len(episode_rewards) % 100 == 0) and (done or terminal)) or \ ((algo == 'pg') and (stats is not None)): logger.print( 'Episode#%d, Updates=%d, Time Elapsed = %.3f min' % (len(episode_rewards), update_times, (time.time() - elap) / 60)) logger.print('-> Total Samples: %d' % t) logger.print('-> Avg Episode Length: %.4f' % (t / len(episode_rewards))) if stats is not None: for k in stats: logger.print(' >> %s = %.4f' % (k, stats[k])) logger.print(' >> Reward = %.4f' % np.mean(episode_rewards[-eval_range:])) logger.print(' >> Success Rate = %.4f' % np.mean(episode_success[-eval_range:])) if multi_target: ep_rew = episode_rewards[-eval_range:] ep_suc = episode_success[-eval_range:] ep_tar = episode_targets[-eval_range:] ep_len = episode_length[-eval_range:] total_n = len(ep_rew) tar_stats = dict() for k, r, s, l in zip(ep_tar, ep_rew, ep_suc, ep_len): if k not in tar_stats: tar_stats[k] = [0.0, 0.0, 0.0, 0.0] tar_stats[k][0] += 1 tar_stats[k][1] += r tar_stats[k][2] += s tar_stats[k][3] += l for k in tar_stats.keys(): n, r, s, l = tar_stats[k] logger.print( ' --> Multi-Room<%s> Freq = %.4f, Rew = %.4f, Succ = %.4f (AvgLen = %.3f)' % (k, n / total_n, r / n, s / n, l / n)) print('----> Data Loading Time = %.4f min' % (time_counter[-1] / 60)) print('----> GPU Data Transfer Time = %.4f min' % (time_counter[0] / 60)) print('----> Training Time = %.4f min' % (time_counter[1] / 60)) print('----> Target Net Update Time = %.4f min' % (time_counter[2] / 60)) t += 1 except KeyboardInterrupt: print('Keyboard Interrupt!!!!!!') trainer.save(save_dir, "final") with open(save_dir + '/final_training_stats.pkl', 'wb') as f: pickle.dump([ episode_rewards, episode_success, episode_targets, episode_length ], f)
def evaluate_aux_pred(house, seed=0, iters=1000, max_episode_len=10, algo='a3c', model_name='rnn', model_file=None, log_dir='./log/eval', store_history=False, use_batch_norm=True, rnn_units=None, rnn_layers=None, rnn_cell=None, multi_target=True, use_target_gating=False, segmentation_input='none', depth_input=False, resolution='normal'): # TODO: currently do not support this assert False, 'Aux Prediction Not Supported!' # Do not need to log detailed computation stats assert algo in ['a3c', 'nop'] flag_run_random_policy = (algo == 'nop') common.debugger = utils.FakeLogger() args = common.create_default_args(algo, model=model_name, use_batch_norm=use_batch_norm, replay_buffer_size=50, episode_len=max_episode_len, rnn_units=rnn_units, rnn_layers=rnn_layers, rnn_cell=rnn_cell, segmentation_input=segmentation_input, resolution_level=resolution, depth_input=depth_input, history_frame_len=1) # TODO: add code for evaluation aux-task (concept learning) args['multi_target'] = multi_target args['target_gating'] = use_target_gating args['aux_task'] = True import zmq_train set_seed(seed) env = common.create_env(house, hardness=1e-8, success_measure='stay', depth_input=depth_input, segment_input=args['segment_input'], genRoomTypeMap=True, cacheAllTarget=True, use_discrete_action=True) trainer = zmq_train.create_zmq_trainer(algo, model_name, args) if model_file is not None: trainer.load(model_file) trainer.eval() # evaluation mode logger = utils.MyLogger(log_dir, True) logger.print('Start Evaluating Auxiliary Task ...') logger.print( ' --> Episode (Left) Turning Steps = {}'.format(max_episode_len)) episode_err = [] episode_succ = [] episode_good = [] episode_rews = [] episode_stats = [] elap = time.time() for it in range(iters): trainer.reset_agent() set_seed(seed + it + 1) # reset seed obs = env.reset() if multi_target else env.reset( target=env.get_current_target()) target_id = common.target_instruction_dict[env.get_current_target()] if multi_target and hasattr(trainer, 'set_target'): trainer.set_target(env.get_current_target()) cur_infos = [] if store_history: cur_infos.append(proc_info(env.info)) # cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False)) if model_name != 'rnn': obs = obs.transpose([1, 0, 2]) episode_succ.append(0) episode_err.append(0) episode_good.append(0) cur_rew = [] cur_pred = [] if flag_run_random_policy: predefined_aux_pred = common.all_aux_predictions[random.choice( common.all_target_instructions)] for _st in range(max_episode_len): # get action if flag_run_random_policy: aux_pred = predefined_aux_pred else: if multi_target: _, _, aux_prob = trainer.action(obs, return_numpy=True, target=[[target_id]], return_aux_pred=True, return_aux_logprob=False) else: _, _, aux_prob = trainer.action(obs, return_numpy=True, return_aux_pred=True, return_aux_logprob=False) aux_prob = aux_prob.squeeze() # [n_pred] aux_pred = int( np.argmax(aux_prob) ) # greedy action, takes the output with the maximum confidence aux_rew = trainer.get_aux_task_reward( aux_pred, env.get_current_room_pred_mask()) cur_rew.append(aux_rew) cur_pred.append(common.all_aux_prediction_list[aux_pred]) if aux_rew < 0: episode_err[-1] += 1 if aux_rew >= 0.9: # currently a hack episode_succ[-1] += 1 if aux_rew > 0: episode_good[-1] += 1 action = 5 # Left Rotation # environment step obs, rew, done, info = env.step(action) if store_history: cur_infos.append(proc_info(info)) cur_infos[-1]['aux_pred'] = cur_pred #cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False)) if model_name != 'rnn': obs = obs.transpose([1, 0, 2]) if episode_err[-1] > 0: episode_succ[-1] = 0 room_mask = env.get_current_room_pred_mask() cur_room_types = [] for i in range(common.n_aux_predictions): if (room_mask & (1 << i)) > 0: cur_room_types.append(common.all_aux_prediction_list[i]) cur_stats = dict(err=episode_err[-1], good=episode_good[-1], succ=episode_succ[-1], rew=cur_rew, err_rate=episode_err[-1] / max_episode_len, good_rate=episode_good[-1] / max_episode_len, succ_rate=episode_succ[-1] / max_episode_len, target=env.get_current_target(), mask=room_mask, room_types=cur_room_types, length=max_episode_len) if store_history: cur_stats['infos'] = cur_infos episode_stats.append(cur_stats) dur = time.time() - elap logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60)) logger.print(' ---> Target Room = {}'.format(cur_stats['target'])) logger.print(' ---> Aux Rew = {}'.format(cur_rew)) if (episode_succ[-1] > 0) and (episode_err[-1] == 0): logger.print(' >>>> Success!') elif episode_err[-1] == 0: logger.print(' >>>> Good!') else: logger.print(' >>>> Failed!') logger.print( " ---> Indep. Prediction: Succ Rate = %.3f, Good Rate = %.3f, Err Rate = %.3f" % (episode_succ[-1] * 100.0 / max_episode_len, episode_good[-1] * 100.0 / max_episode_len, episode_err[-1] * 100.0 / max_episode_len)) logger.print( " > Accu. Succ = %.3f, Good = %.3f, Fail = %.3f" % (float(np.mean([float(s == max_episode_len) for s in episode_succ])) * 100.0, float(np.mean([float(e == 0) for e in episode_err])) * 100, float(np.mean([float(e > 0) for e in episode_err])) * 100)) logger.print( " > Accu. Rate: Succ Rate = %.3f, Good Rate = %.3f, Fail Rate = %.3f" % (float(np.mean([s / max_episode_len for s in episode_succ])) * 100.0, float(np.mean([g / max_episode_len for g in episode_good])) * 100, float(np.mean([e / max_episode_len for e in episode_err])) * 100)) return episode_stats
def evaluate(house, seed=0, render_device=None, iters=1000, max_episode_len=1000, task_name='roomnav', false_rate=0.0, hardness=None, max_birthplace_steps=None, success_measure='center', multi_target=False, fixed_target=None, algo='nop', model_name='cnn', model_file=None, log_dir='./log/eval', store_history=False, use_batch_norm=True, rnn_units=None, rnn_layers=None, rnn_cell=None, use_action_gating=False, use_residual_critic=False, use_target_gating=False, segmentation_input='none', depth_input=False, target_mask_input=False, resolution='normal', history_len=4, include_object_target=False, include_outdoor_target=True, aux_task=False, no_skip_connect=False, feed_forward=False, greedy_execution=False, greedy_aux_pred=False): assert not aux_task, 'Do not support Aux-Task now!' elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() args = common.create_default_args(algo, model=model_name, use_batch_norm=use_batch_norm, replay_buffer_size=50, episode_len=max_episode_len, rnn_units=rnn_units, rnn_layers=rnn_layers, rnn_cell=rnn_cell, segmentation_input=segmentation_input, resolution_level=resolution, depth_input=depth_input, target_mask_input=target_mask_input, history_frame_len=history_len) args['action_gating'] = use_action_gating args['residual_critic'] = use_residual_critic args['multi_target'] = multi_target args['object_target'] = include_object_target args['target_gating'] = use_target_gating args['aux_task'] = aux_task args['no_skip_connect'] = no_skip_connect args['feed_forward'] = feed_forward if (fixed_target is not None) and (fixed_target not in ['any-room', 'any-object']): assert fixed_target in common.n_target_instructions, 'invalid fixed target <{}>'.format( fixed_target) __backup_CFG = common.CFG.copy() if fixed_target == 'any-room': common.ensure_object_targets(False) if hardness is not None: print('>>>> Hardness = {}'.format(hardness)) if max_birthplace_steps is not None: print('>>>> Max BirthPlace Steps = {}'.format(max_birthplace_steps)) set_seed(seed) env = common.create_env(house, task_name=task_name, false_rate=false_rate, hardness=hardness, max_birthplace_steps=max_birthplace_steps, success_measure=success_measure, depth_input=depth_input, target_mask_input=target_mask_input, segment_input=args['segment_input'], genRoomTypeMap=aux_task, cacheAllTarget=multi_target, render_device=render_device, use_discrete_action=('dpg' not in algo), include_object_target=include_object_target and (fixed_target != 'any-room'), include_outdoor_target=include_outdoor_target, discrete_angle=True) if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): env.reset_target(fixed_target) if fixed_target == 'any-room': common.CFG = __backup_CFG common.ensure_object_targets(True) # create model if model_name == 'rnn': import zmq_train trainer = zmq_train.create_zmq_trainer(algo, model_name, args) else: trainer = common.create_trainer(algo, model_name, args) if model_file is not None: trainer.load(model_file) trainer.eval() # evaluation mode if greedy_execution and hasattr(trainer, 'set_greedy_execution'): trainer.set_greedy_execution() else: print('[Eval] WARNING!!! Greedy Policy Execution NOT Available!!!') greedy_execution = False if greedy_aux_pred and hasattr(trainer, 'set_greedy_aux_prediction'): trainer.set_greedy_aux_prediction() else: print( '[Eval] WARNING!!! Greedy Execution of Auxiliary Task NOT Available!!!' ) greedy_aux_pred = False if aux_task: assert trainer.is_rnn() # only rnn support aux_task #flag_random_reset_target = multi_target and (fixed_target is None) logger = utils.MyLogger(log_dir, True) logger.print('Start Evaluating ...') episode_success = [] episode_good = [] episode_stats = [] t = 0 for it in range(iters): cur_infos = [] trainer.reset_agent() set_seed(seed + it + 1) # reset seed obs = env.reset(target=fixed_target) #if multi_target and (fixed_target is not None) and (fixed_target != 'kitchen'): # # TODO: Currently a hacky solution # env.reset(target=fixed_target) # if house < 0: # multi-house env # obs = env.reset(reset_target=False, keep_world=True) # else: # obs = env.reset(reset_target=False) #else: # # TODO: Only support multi-target + fixed kitchen; or fixed-target (kitchen) # obs = env.reset(reset_target=flag_random_reset_target) target_id = common.target_instruction_dict[env.get_current_target()] if multi_target and hasattr(trainer, 'set_target'): trainer.set_target(env.get_current_target()) if store_history: cur_infos.append(proc_info(env.info)) #cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False)) if model_name != 'rnn': obs = obs.transpose([1, 0, 2]) episode_success.append(0) episode_good.append(0) cur_stats = dict(best_dist=1e50, success=0, good=0, reward=0, target=env.get_current_target(), meters=env.info['meters'], optstep=env.info['optsteps'], length=max_episode_len, images=None) if aux_task: cur_stats['aux_pred_rew'] = 0 cur_stats['aux_pred_err'] = 0 if hasattr(env.house, "_id"): cur_stats['world_id'] = env.house._id episode_step = 0 for _st in range(max_episode_len): # get action if trainer.is_rnn(): idx = 0 if multi_target: if aux_task: action, _, aux_pred = trainer.action( obs, return_numpy=True, target=[[target_id]], return_aux_pred=True) else: action, _ = trainer.action(obs, return_numpy=True, target=[[target_id]]) else: if aux_task: action, _, aux_pred = trainer.action( obs, return_numpy=True, return_aux_pred=True) else: action, _ = trainer.action(obs, return_numpy=True) action = action.squeeze() if greedy_execution: action = int(np.argmax(action)) else: action = int(action) if aux_task: aux_pred = aux_pred.squeeze() if greedy_aux_pred: aux_pred = int(np.argmax(aux_pred)) else: aux_pred = int(aux_pred) aux_rew = trainer.get_aux_task_reward( aux_pred, env.get_current_room_pred_mask()) cur_stats['aux_pred_rew'] += aux_rew if aux_rew < 0: cur_stats['aux_pred_err'] += 1 else: idx = trainer.process_observation(obs) action = trainer.action( None if greedy_execution else 1.0) # use gumbel noise # environment step obs, rew, done, info = env.step(action) if store_history: cur_infos.append(proc_info(info)) #cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False)) if model_name != 'rnn': obs = obs.transpose([1, 0, 2]) cur_dist = info['dist'] if cur_dist == 0: cur_stats['good'] += 1 episode_good[-1] = 1 t += 1 if cur_dist < cur_stats['best_dist']: cur_stats['best_dist'] = cur_dist episode_step += 1 # collect experience trainer.process_experience(idx, action, rew, done, (_st + 1 >= max_episode_len), info) if done: if rew > 5: # magic number: episode_success[-1] = 1 cur_stats['success'] = 1 cur_stats['length'] = episode_step if aux_task: cur_stats['aux_pred_err'] /= episode_step cur_stats['aux_pred_rew'] /= episode_step break if store_history: cur_stats['infos'] = cur_infos episode_stats.append(cur_stats) dur = time.time() - elap logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60)) if multi_target: logger.print(' ---> Target Room = {}'.format(cur_stats['target'])) logger.print(' ---> Total Samples = {}'.format(t)) logger.print(' ---> Success = %d (rate = %.3f)' % (cur_stats['success'], np.mean(episode_success))) logger.print( ' ---> Times of Reaching Target Room = %d (rate = %.3f)' % (cur_stats['good'], np.mean(episode_good))) logger.print(' ---> Best Distance = %d' % cur_stats['best_dist']) logger.print(' ---> Birth-place Distance = %d' % cur_stats['optstep']) if aux_task: logger.print( ' >>>>>> Aux-Task: Avg Rew = %.4f, Avg Err = %.4f' % (cur_stats['aux_pred_rew'], cur_stats['aux_pred_err'])) logger.print('######## Final Stats ###########') logger.print('Success Rate = %.3f' % np.mean(episode_success)) logger.print( '> Avg Ep-Length per Success = %.3f' % np.mean([s['length'] for s in episode_stats if s['success'] > 0])) logger.print( '> Avg Birth-Meters per Success = %.3f' % np.mean([s['meters'] for s in episode_stats if s['success'] > 0])) logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good)) logger.print('> Avg Ep-Length per Target Reach = %.3f' % np.mean([s['length'] for s in episode_stats if s['good'] > 0])) logger.print('> Avg Birth-Meters per Target Reach = %.3f' % np.mean([s['meters'] for s in episode_stats if s['good'] > 0])) if multi_target: all_targets = list(set([s['target'] for s in episode_stats])) for tar in all_targets: n = sum([1.0 for s in episode_stats if s['target'] == tar]) succ = [ float(s['success'] > 0) for s in episode_stats if s['target'] == tar ] good = [ float(s['good'] > 0) for s in episode_stats if s['target'] == tar ] length = [s['length'] for s in episode_stats if s['target'] == tar] meters = [s['meters'] for s in episode_stats if s['target'] == tar] good_len = np.mean([l for l, g in zip(length, good) if g > 0.5]) succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5]) good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5]) succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5]) logger.print( '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)' % (tar, n / len(episode_stats), n, np.mean(good), good_len, good_mts, np.mean(succ), succ_len, succ_mts)) if aux_task: logger.print( ' -->>> Auxiliary-Task: Mean Episode Avg Rew = %.6f, Mean Episode Avg Err = %.6f' % (np.mean([float(s['aux_pred_rew']) for s in episode_stats]), np.mean([float(s['aux_pred_err']) for s in episode_stats]))) return episode_stats
def evaluate(args): elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() # ensure observation shape common.process_observation_shape( 'rnn', args['resolution'], segmentation_input=args['segmentation_input'], depth_input=args['depth_input'], history_frame_len=1, target_mask_input=args['target_mask_input']) fixed_target = args['fixed_target'] if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): assert fixed_target in common.all_target_instructions, 'invalid fixed target <{}>'.format( fixed_target) __backup_CFG = common.CFG.copy() if fixed_target == 'any-room': common.ensure_object_targets(False) if args['hardness'] is not None: print('>>>> Hardness = {}'.format(args['hardness'])) if args['max_birthplace_steps'] is not None: print('>>>> Max BirthPlace Steps = {}'.format( args['max_birthplace_steps'])) set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], hardness=args['hardness'], max_birthplace_steps=args['max_birthplace_steps'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], genRoomTypeMap=False, cacheAllTarget=args['multi_target'], render_device=args['render_gpu'], use_discrete_action=True, include_object_target=args['object_target'] and (fixed_target != 'any-room'), include_outdoor_target=args['outdoor_target'], discrete_angle=True, min_birthplace_grids=args['min_birthplace_grids']) if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): task.reset_target(fixed_target) if fixed_target == 'any-room': common.CFG = __backup_CFG common.ensure_object_targets(True) # create semantic classifier if args['semantic_dir'] is not None: assert os.path.exists( args['semantic_dir'] ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir']) assert not args[ 'object_target'], '[ERROR] currently do not support --object-target!' print('Loading Semantic Oracle from dir <{}>...'.format( args['semantic_dir'])) if args['semantic_gpu'] is None: args['semantic_gpu'] = common.get_gpus_for_rendering()[0] oracle = SemanticOracle(model_dir=args['semantic_dir'], model_device=args['semantic_gpu'], include_object=args['object_target']) oracle_func = OracleFunction( oracle, threshold=args['semantic_threshold'], filter_steps=args['semantic_filter_steps']) else: oracle_func = None # create motion motion = create_motion(args, task, oracle_func) logger = utils.MyLogger(args['log_dir'], True) logger.print('Start Evaluating ...') episode_success = [] episode_good = [] episode_stats = [] t = 0 seed = args['seed'] max_episode_len = args['max_episode_len'] plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None for it in range(args['max_iters']): cur_infos = [] motion.reset() set_seed(seed + it + 1) # reset seed if plan_req is not None: while True: task.reset(target=fixed_target) m = len(task.get_optimal_plan()) if (m in plan_req) and plan_req[m] > 0: break plan_req[m] -= 1 else: task.reset(target=fixed_target) info = task.info episode_success.append(0) episode_good.append(0) cur_stats = dict(best_dist=info['dist'], success=0, good=0, reward=0, target=task.get_current_target(), meters=task.info['meters'], optstep=task.info['optsteps'], length=max_episode_len, images=None) if hasattr(task.house, "_id"): cur_stats['world_id'] = task.house._id store_history = args['store_history'] if store_history: cur_infos.append(proc_info(task.info)) if args['temperature'] is not None: ep_data = motion.run(task.get_current_target(), max_episode_len, temperature=args['temperature']) else: ep_data = motion.run(task.get_current_target(), max_episode_len) for dat in ep_data: info = dat[4] if store_history: cur_infos.append(proc_info(info)) cur_dist = info['dist'] if cur_dist == 0: cur_stats['good'] += 1 episode_good[-1] = 1 if cur_dist < cur_stats['best_dist']: cur_stats['best_dist'] = cur_dist episode_step = len(ep_data) if ep_data[-1][3]: # done if ep_data[-1][2] > 5: # magic number: episode_success[-1] = 1 cur_stats['success'] = 1 cur_stats['length'] = episode_step # store length if store_history: cur_stats['infos'] = cur_infos episode_stats.append(cur_stats) dur = time.time() - elap logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60)) if args['multi_target']: logger.print(' ---> Target Room = {}'.format(cur_stats['target'])) logger.print(' ---> Total Samples = {}'.format(t)) logger.print(' ---> Success = %d (rate = %.3f)' % (cur_stats['success'], np.mean(episode_success))) logger.print( ' ---> Times of Reaching Target Room = %d (rate = %.3f)' % (cur_stats['good'], np.mean(episode_good))) logger.print(' ---> Best Distance = %d' % cur_stats['best_dist']) logger.print(' ---> Birth-place Distance = %d' % cur_stats['optstep']) logger.print('######## Final Stats ###########') logger.print('Success Rate = %.3f' % np.mean(episode_success)) logger.print( '> Avg Ep-Length per Success = %.3f' % np.mean([s['length'] for s in episode_stats if s['success'] > 0])) logger.print( '> Avg Birth-Meters per Success = %.3f' % np.mean([s['meters'] for s in episode_stats if s['success'] > 0])) logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good)) logger.print('> Avg Ep-Length per Target Reach = %.3f' % np.mean([s['length'] for s in episode_stats if s['good'] > 0])) logger.print('> Avg Birth-Meters per Target Reach = %.3f' % np.mean([s['meters'] for s in episode_stats if s['good'] > 0])) if args['multi_target']: all_targets = list(set([s['target'] for s in episode_stats])) for tar in all_targets: n = sum([1.0 for s in episode_stats if s['target'] == tar]) succ = [ float(s['success'] > 0) for s in episode_stats if s['target'] == tar ] good = [ float(s['good'] > 0) for s in episode_stats if s['target'] == tar ] length = [s['length'] for s in episode_stats if s['target'] == tar] meters = [s['meters'] for s in episode_stats if s['target'] == tar] good_len = np.mean([l for l, g in zip(length, good) if g > 0.5]) succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5]) good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5]) succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5]) logger.print( '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)' % (tar, n / len(episode_stats), n, np.mean(good), good_len, good_mts, np.mean(succ), succ_len, succ_mts)) return episode_stats
def __init__(self, k=0, task_name='roomnav', false_rate=0.0, reward_type='indicator', reward_silence=0, success_measure='see', multi_target=True, include_object_target=True, fixed_target=None, aux_task=False, hardness=None, max_birthplace_steps=None, min_birthplace_grids=0, curriculum_schedule=None, segment_input='none', depth_input=False, target_mask_input=False, cache_supervision=False, include_outdoor_target=True, mask_feature_dim=None, max_steps=-1, device=0): assert k >= 0 init_birthplace = max_birthplace_steps if curriculum_schedule is None else curriculum_schedule[ 0] self.env = common.create_env( k, task_name=task_name, false_rate=false_rate, reward_type=reward_type, hardness=hardness, max_birthplace_steps=init_birthplace, success_measure=success_measure, segment_input=segment_input, depth_input=depth_input, target_mask_input=target_mask_input, max_steps=max_steps, render_device=device, genRoomTypeMap=aux_task, cacheAllTarget=multi_target, include_object_target=include_object_target, use_discrete_action=True, # assume A3C with discrete actions reward_silence=reward_silence, #curriculum_schedule=curriculum_schedule, cache_supervision=cache_supervision, include_outdoor_target=include_outdoor_target, min_birthplace_grids=min_birthplace_grids) self.obs = self.env.reset() if multi_target else self.env.reset( target='kitchen') self.done = False self.multi_target = multi_target self.fixed_target = fixed_target self.aux_task = aux_task self.supervision = cache_supervision self.mask_feature_dim = mask_feature_dim self._mask_feature = self._get_mask_feature( ) if mask_feature_dim is not None else None self._sup_act = self.env.info[ 'supervision'] if self.supervision else None if self.aux_task: #self._aux_target = self.env.get_current_room_pred_mask() TODO: Currently do not support aux room pred assert False, 'Aux Room Prediction Currently Not Supported!' self._target = common.target_instruction_dict[ self.env.get_current_target()]
def learn_controller(args): elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() if args['object_target']: common.ensure_object_targets() set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], cacheAllTarget=True, render_device=args['render_gpu'], use_discrete_action=True, include_object_target=args['object_target'], include_outdoor_target=args['outdoor_target'], discrete_angle=True) # create motion __controller_warmstart = args['warmstart'] args['warmstart'] = args['motion_warmstart'] motion = create_motion(args, task) args['warmstart'] = __controller_warmstart # logger logger = utils.MyLogger(args['save_dir'], True) logger.print("> Planner Units = {}".format(args['units'])) logger.print("> Max Planner Steps = {}".format(args['max_planner_steps'])) logger.print("> Max Exploration Steps = {}".format(args['max_exp_steps'])) logger.print("> Reward = {} & {}".format(args['time_penalty'], args['success_reward'])) # Planner Learning logger.print('Start RNN Planner Learning ...') planner = RNNPlanner(motion, args['units'], args['warmstart']) fixed_target = None if args['only_eval_room']: fixed_target = 'any-room' elif args['only_eval_object']: fixed_target = 'any-object' train_stats, eval_stats = \ planner.learn(args['iters'], args['max_episode_len'], target=fixed_target, motion_steps=args['max_exp_steps'], planner_steps=args['max_planner_steps'], batch_size=args['batch_size'], lrate=args['lrate'], grad_clip=args['grad_clip'], weight_decay=args['weight_decay'], gamma=args['gamma'], entropy_penalty=args['entropy_penalty'], save_dir=args['save_dir'], report_rate=5, eval_rate=20, save_rate=100, logger=logger, seed=args['seed']) logger.print('######## Done ###########') filename = args['save_dir'] if filename[-1] != '/': filename = filename + '/' filename = filename + 'train_stats.pkl' with open(filename, 'wb') as f: pickle.dump([train_stats, eval_stats], f) logger.print(' --> Training Stats Saved to <{}>!'.format(filename)) return planner
def evaluate(args, data_saver=None): args['segment_input'] = args['segmentation_input'] backup_rate = args['backup_rate'] elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() # ensure observation shape common.process_observation_shape( 'rnn', args['resolution'], args['segmentation_input'], args['depth_input'], target_mask_input=args['target_mask_input']) fixed_target = args['fixed_target'] if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): assert fixed_target in common.n_target_instructions, 'invalid fixed target <{}>'.format( fixed_target) __backup_CFG = common.CFG.copy() if fixed_target == 'any-room': common.ensure_object_targets(False) if args['hardness'] is not None: print('>>>> Hardness = {}'.format(args['hardness'])) if args['max_birthplace_steps'] is not None: print('>>>> Max BirthPlace Steps = {}'.format( args['max_birthplace_steps'])) set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], hardness=args['hardness'], max_birthplace_steps=args['max_birthplace_steps'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], genRoomTypeMap=False, cacheAllTarget=args['multi_target'], render_device=args['render_gpu'], use_discrete_action=True, include_object_target=args['object_target'] and (fixed_target != 'any-room'), include_outdoor_target=args['outdoor_target'], discrete_angle=True, min_birthplace_grids=args['min_birthplace_grids']) if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): task.reset_target(fixed_target) if fixed_target == 'any-room': common.CFG = __backup_CFG common.ensure_object_targets(True) # logger logger = utils.MyLogger(args['log_dir'], True) logger.print('Start Evaluating ...') # create semantic classifier if args['semantic_dir'] is not None: assert os.path.exists( args['semantic_dir'] ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir']) assert not args[ 'object_target'], '[ERROR] currently do not support --object-target!' print('Loading Semantic Oracle from dir <{}>...'.format( args['semantic_dir'])) if args['semantic_gpu'] is None: args['semantic_gpu'] = common.get_gpus_for_rendering()[0] oracle = SemanticOracle(model_dir=args['semantic_dir'], model_device=args['semantic_gpu'], include_object=args['object_target']) oracle_func = OracleFunction( oracle, threshold=args['semantic_threshold'], filter_steps=args['semantic_filter_steps'], batched_size=args['semantic_batch_size']) else: oracle_func = None # create motion motion = create_motion(args, task, oracle_func=oracle_func) if args['motion'] == 'random': motion.set_skilled_rate(args['random_motion_skill']) flag_interrupt = args['interruptive_motion'] # create planner graph = None max_motion_steps = args['n_exp_steps'] if (args['planner'] == None) or (args['planner'] == 'void'): graph = VoidPlanner(motion) elif args['planner'] == 'oracle': graph = OraclePlanner(motion) elif args['planner'] == 'rnn': #assert False, 'Currently only support Graph-planner' graph = RNNPlanner(motion, args['planner_units'], args['planner_filename'], oracle_func=oracle_func) else: graph = GraphPlanner(motion) if not args['outdoor_target']: graph.add_excluded_target('outdoor') filename = args['planner_filename'] if filename == 'None': filename = None if filename is not None: logger.print(' > Loading Graph from file = <{}>'.format(filename)) with open(filename, 'rb') as f: _params = pickle.load(f) graph.set_parameters(_params) # hack if args['planner_obs_noise'] is not None: graph.set_param(-1, args['planner_obs_noise']) # default 0.95 episode_success = [] episode_good = [] episode_stats = [] t = 0 seed = args['seed'] max_episode_len = args['max_episode_len'] plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None #################### accu_plan_time = 0 accu_exe_time = 0 accu_mask_time = 0 #################### for it in range(args['max_iters']): if (it > 0) and (backup_rate > 0) and (it % backup_rate == 0) and (data_saver is not None): data_saver.save(episode_stats, ep_id=it) cur_infos = [] motion.reset() set_seed(seed + it + 1) # reset seed if plan_req is not None: while True: task.reset(target=fixed_target) m = len(task.get_optimal_plan()) if (m in plan_req) and plan_req[m] > 0: break plan_req[m] -= 1 else: task.reset(target=fixed_target) info = task.info episode_success.append(0) episode_good.append(0) task_target = task.get_current_target() cur_stats = dict(best_dist=info['dist'], success=0, good=0, reward=0, target=task_target, plan=[], meters=task.info['meters'], optstep=task.info['optsteps'], length=max_episode_len, images=None) if hasattr(task.house, "_id"): cur_stats['world_id'] = task.house._id store_history = args['store_history'] if store_history: cur_infos.append(proc_info(task.info)) episode_step = 0 # reset planner if graph is not None: graph.reset() while episode_step < max_episode_len: if flag_interrupt and motion.is_interrupt(): graph_target = task.get_current_target() else: # TODO ##################### tt = time.time() mask_feat = oracle_func.get( task ) if oracle_func is not None else task.get_feature_mask() accu_mask_time += time.time() - tt tt = time.time() graph_target = graph.plan(mask_feat, task_target) accu_plan_time += time.time() - tt ################################ graph_target_id = common.target_instruction_dict[graph_target] allowed_steps = min(max_episode_len - episode_step, max_motion_steps) ############### # TODO tt = time.time() motion_data = motion.run(graph_target, allowed_steps) accu_exe_time += time.time() - tt cur_stats['plan'].append( (graph_target, len(motion_data), (motion_data[-1][0][graph_target_id] > 0))) # store stats for dat in motion_data: info = dat[4] if store_history: cur_infos.append(proc_info(info)) cur_dist = info['dist'] if cur_dist == 0: cur_stats['good'] += 1 episode_good[-1] = 1 if cur_dist < cur_stats['best_dist']: cur_stats['best_dist'] = cur_dist # update graph ## TODO ############ tt = time.time() graph.observe(motion_data, graph_target) accu_plan_time += time.time() - tt episode_step += len(motion_data) # check done if motion_data[-1][3]: if motion_data[-1][2] > 5: # magic number episode_success[-1] = 1 cur_stats['success'] = 1 break cur_stats['length'] = episode_step # store length if store_history: cur_stats['infos'] = cur_infos episode_stats.append(cur_stats) dur = time.time() - elap logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60)) #TODO ################# logger.print(' >>> Mask Time = %.4f min' % (accu_mask_time / 60)) logger.print(' >>> Plan Time = %.4f min' % (accu_plan_time / 60)) logger.print(' >>> Motion Time = %.4f min' % (accu_exe_time / 60)) if args['multi_target']: logger.print(' ---> Target Room = {}'.format(cur_stats['target'])) logger.print(' ---> Total Samples = {}'.format(t)) logger.print(' ---> Success = %d (rate = %.3f)' % (cur_stats['success'], np.mean(episode_success))) logger.print( ' ---> Times of Reaching Target Room = %d (rate = %.3f)' % (cur_stats['good'], np.mean(episode_good))) logger.print(' ---> Best Distance = %d' % cur_stats['best_dist']) logger.print(' ---> Birth-place Meters = %.4f (optstep = %d)' % (cur_stats['meters'], cur_stats['optstep'])) logger.print(' ---> Planner Results = {}'.format(cur_stats['plan'])) logger.print('######## Final Stats ###########') logger.print('Success Rate = %.3f' % np.mean(episode_success)) logger.print( '> Avg Ep-Length per Success = %.3f' % np.mean([s['length'] for s in episode_stats if s['success'] > 0])) logger.print( '> Avg Birth-Meters per Success = %.3f' % np.mean([s['meters'] for s in episode_stats if s['success'] > 0])) logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good)) logger.print('> Avg Ep-Length per Target Reach = %.3f' % np.mean([s['length'] for s in episode_stats if s['good'] > 0])) logger.print('> Avg Birth-Meters per Target Reach = %.3f' % np.mean([s['meters'] for s in episode_stats if s['good'] > 0])) if args['multi_target']: all_targets = list(set([s['target'] for s in episode_stats])) for tar in all_targets: n = sum([1.0 for s in episode_stats if s['target'] == tar]) succ = [ float(s['success'] > 0) for s in episode_stats if s['target'] == tar ] good = [ float(s['good'] > 0) for s in episode_stats if s['target'] == tar ] length = [s['length'] for s in episode_stats if s['target'] == tar] meters = [s['meters'] for s in episode_stats if s['target'] == tar] good_len = np.mean([l for l, g in zip(length, good) if g > 0.5]) succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5]) good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5]) succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5]) logger.print( '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)' % (tar, n / len(episode_stats), n, np.mean(good), good_len, good_mts, np.mean(succ), succ_len, succ_mts)) return episode_stats