for include_quat in (True, False): if not include_joint_angles and not include_quat: continue # setting experiment name for particular env EXPERIMENT_NAME = "{}-{}-{}-{}-{}".format('experiment1', env_name, include_joint_angles, include_quat, date_string) for n in range(25): for time_constant in time_constants: level = get_env_name( env_name, time_constant, include_joint_angles, include_quat, eval=False, ) eval_level = get_env_name( env_name, time_constant, include_joint_angles, include_quat, eval=True, ) job_name = run_kubernetes.run( command= "python3 /root/flexible_robotics/flexible_robotics/agents/coach_script_ddpg.py --experiment_name {experiment_name}" .format(experiment_name=EXPERIMENT_NAME),
'consumer_token': consumer_token, 'root_path': root_path, 'ashes_renderer': renderer } api_app = Application(api_routes, resources, middlewares=[MessageMiddleware()] + middlewares, render_factory=render_basic) ui_app = Application(ui_routes, resources, middlewares=[MessageMiddleware(use_ashes=True)] + middlewares, render_factory=renderer) static_app = StaticApplication(STATIC_PATH) root_app = Application([ StaticFileRoute('/', STATIC_PATH + '/index.html'), ('/', static_app), ('/', ui_app), ('/v1/', api_app), ('/meta', MetaApplication()) ]) return root_app env_name = get_env_name() app = create_app(env_name=env_name) if __name__ == '__main__': app.serve()
def infer(game, representation, infer_kwargs, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ infer_kwargs = {**infer_kwargs, "inference": True, "render": True} max_trials = kwargs.get("max_trials", -1) # n = kwargs.get("n", None) exp_id = infer_kwargs.get('experiment_id') map_width = infer_kwargs.get("map_width") env_name = get_env_name(game, representation) exp_name = get_exp_name(game, representation, **infer_kwargs) # if n is None: # if EXPERIMENT_ID is None: # n = max_exp_idx(exp_name) # else: # n = EXPERIMENT_ID # if n == 0: # raise Exception( # "Did not find ranked saved model of experiment: {}".format(exp_name) # ) crop_size = infer_kwargs.get("cropped_size") if crop_size == -1: infer_kwargs["cropped_size"] = get_crop_size(game) # log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, n) log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, exp_id) # no log dir, 1 parallel environment n_cpu = infer_kwargs.get("n_cpu") env, dummy_action_space, n_tools = make_vec_envs( env_name, representation, None, **infer_kwargs ) print("loading model at {}".format(log_dir)) model = load_model( log_dir, load_best=infer_kwargs.get("load_best"), n_tools=n_tools ) if model is None: raise Exception("No model loaded") # model.set_env(env) env.action_space = dummy_action_space obs = env.reset() # Record final values of each trial # if 'binary' in env_name: # path_lengths = [] # changes = [] # regions = [] # infer_info = { # 'path_lengths': [], # 'changes': [], # 'regions': [], # } n_trials = 0 n_step = 0 while n_trials != max_trials: # action = get_action(obs, env, model) action, _ = model.predict(obs) obs, rewards, dones, info = env.step(action) # print('reward: {}'.format(rewards)) # reward = rewards[0] # n_regions = info[0]['regions'] # readouts = [] # if 'binary' in env_name: # curr_path_length = info[0]['path-length'] # readouts.append('path length: {}'.format(curr_path_length) ) # path_lengths.append(curr_path_length) # changes.append(info[0]['changes']) # regions.append(info[0]['regions']) # readouts += ['regions: {}'.format(n_regions), 'reward: {}'.format(reward)] # stringexec = "" # m=0 # y0, dy = 50, 40 # img = np.zeros((256,512,3), np.uint8) # scale_percent = 60 # percent of original size # width = int(img.shape[1] * scale_percent / 100) # height = int(img.shape[0] * scale_percent / 100) # dim = (width, height) # # resize image # for i, line in enumerate(readouts): # y = y0 + i*dy # cv2.putText(img, line, (50, y), font, fontScale, fontColor, lineType) # #stringexec ="cv2.putText(img, TextList[" + str(TextList.index(i))+"], (100, 100+"+str(m)+"), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 100, 100), 1, cv2.LINE_AA)\n" # #m += 100 # #cv2.putText( # # img,readout, # # topLeftCornerOfText, # # font, # # fontScale, # # fontColor, # # lineType) # #Display the image # resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) # cv2.imshow("img",resized) # cv2.waitKey(1) # #for p, v in model.get_parameters().items(): # # print(p, v.shape) n_step += 1 if dones: env.reset() # #show_state(env, path_lengths, changes, regions, n_step) # if 'binary' in env_name: # infer_info['path_lengths'] = path_lengths[-1] # infer_info['changes'] = changes[-1] # infer_info['regions'] = regions[-1] n_step = 0 n_trials += 1
def main(game, representation, n_frames, n_cpu, render, logging, **kwargs): if game not in [ "binary_ctrl", "sokoban_ctrl", "zelda_ctrl", "smb_ctrl", "MicropolisEnv", "RCT" ]: raise Exception( "Not a controllable environment. Maybe add '_ctrl' to the end of the name? E.g. 'sokoban_ctrl'" ) kwargs['n_cpu'] = n_cpu env_name = get_env_name(game, representation) print('env name: ', env_name) exp_name = get_exp_name(game, representation, **kwargs) resume = kwargs.get('resume', False) ca_action = kwargs.get('ca_action') if representation == 'wide' and not ('RCT' in game or 'Micropolis' in game): if ca_action: raise Exception() # policy = CApolicy else: policy = FullyConvPolicyBigMap # policy = WidePolicy if game == "sokoban" or game == "sokoban_ctrl": # T() policy = FullyConvPolicySmallMap else: # policy = ActorCriticCnnPolicy policy = CustomPolicyBigMap if game == "sokoban" or game == "sokoban_ctrl": # T() policy = CustomPolicySmallMap crop_size = kwargs.get('cropped_size') if crop_size == -1: kwargs['cropped_size'] = get_crop_size(game) exp_id = kwargs.get('experiment_id') # n = kwargs.get('experiment_id') # if n is None: # n = max_exp_idx(exp_name) # if not resume: # n += 1 global log_dir exp_name_id = '{}_{}'.format(exp_name, exp_id) # log_dir = 'rl_runs/{}_{}_log'.format(exp_name, n) log_dir = 'rl_runs/{}_log'.format(exp_name_id) kwargs = { **kwargs, 'render_rank': 0, 'render': render, } # if not resume: try: os.mkdir(log_dir) print("Log directory does not exist, starting anew, bb.") resume = False except Exception: print("Log directory exists, fumbling on. Will try to load model.") try: env, dummy_action_space, n_tools = make_vec_envs( env_name, representation, log_dir, **kwargs) except Exception as e: # if this is a new experiment, clean up the logging directory if we fail to start up # if not resume: # os.rmdir(log_dir) raise e with open(os.path.join(log_dir, 'settings.json'), 'w', encoding='utf-8') as f: json.dump(kwargs, f, ensure_ascii=False, indent=4) # pass if resume: model = load_model(log_dir, n_tools=n_tools) if representation == 'wide': # policy_kwargs = {'n_tools': n_tools} policy_kwargs = {} if ca_action: # FIXME: there should be a better way hahahaha env.action_space = dummy_action_space # more frequent updates, for debugging... or because our action space is huge? # n_steps = 512 else: pass # n_steps = 2048 else: policy_kwargs = {} # the default for SB3 PPO # n_steps = 2048 if not resume or model is None: # model = PPO(policy, env, verbose=1, n_steps=n_steps, # tensorboard_log="./runs", policy_kwargs=policy_kwargs) model = PPO2(policy, env, verbose=1, tensorboard_log="./rl_runs", policy_kwargs=policy_kwargs) # else: model.set_env(env) #model.policy = model.policy.to('cuda:0') # if torch.cuda.is_available(): # model.policy = model.policy.cuda() tb_log_name = '{}_tb'.format(exp_name_id) if not logging: model.learn(total_timesteps=n_frames, tb_log_name=tb_log_name) else: model.learn(total_timesteps=n_frames, tb_log_name=tb_log_name, callback=callback)
def extract_settings_from_options(self, options): settings = {} settings['ENV'] = options.get('env') or utils.get_env_name() if not settings['ENV']: raise InstallError( 'Could not extract env name from path and none specified.' ) pyfile = settings['ENV'] + '.py' if not os.path.isfile(utils.get_settings_file(pyfile)): warnings.warn( 'Could not find settings file for env named {}' .format(pyfile), ) settings['DEBUG'] = ( settings['ENV'] == 'development' or options.get('DEBUG') ) settings['INSTANCE_NAME'] = utils.get_default( options, 'instance_name', utils.get_instance_name(), ) if not settings['INSTANCE_NAME']: raise InstallError( 'Could not extract instance name from path and none specified.' ) settings['SECRET_KEY'] = utils.get_default( options, 'secret_key', utils.generate_secret_key(options.get('secret_key_length')) ) settings['DATABASE_NAME'] = utils.get_default( options, 'db_name', settings['INSTANCE_NAME'], ) settings['DATABASE_USER'] = utils.get_default( options, 'db_user', settings['DATABASE_NAME'], ) settings['DATABASE_PASSWORD'] = utils.get_default( options, 'db_password', get_password('Please enter the database user password: '******'DATABASE_HOST'] = utils.get_default(options, 'db_host', None) settings['DATABASE_PORT'] = utils.get_default(options, 'db_port', None) settings['SITE_DOMAIN_NAME'] = utils.get_default( options, 'domain', input('Enter in the domain name for this project instance: '), ) settings['SUBDOMAINS'] = [] settings['ADDITIONAL_SETTINGS_FILES'] = utils.get_default( options, 'additional_settings_file', [], ) return settings
TAIL_LOGS = False envs = ("FlexibleReacher", "FlexibleReacherStay", "FlexibleStriker", "FlexibleThrower") date_string = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") for train_env_name, test_env_name in itertools.product(envs, repeat=2): for sensor_selection in ("withIMU", "withoutIMU"): # setting experiment name for particular env EXPERIMENT_NAME = "experiment3-{}-{}-{}-{}".format( train_env_name, test_env_name, sensor_selection, date_string) for n in range(10): for time_constant in time_constants: level = get_env_name(train_env_name, time_constant, sensor_selection, eval=False) eval_level = get_env_name(test_env_name, time_constant, sensor_selection, eval=True) job_name = run_kubernetes.run( command= "python3 /root/flexible_robotics/flexible_robotics/agents/coach_script_ddpg.py --experiment_name {experiment_name}" .format(experiment_name=EXPERIMENT_NAME), environment={ "level": level, "eval_level": eval_level }, )
def evaluate(game, representation, infer_kwargs, fix_trgs=False, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ global N_BINS global N_MAPS global N_TRIALS infer_kwargs = {**infer_kwargs, "inference": True, "evaluate": True} # max_trials = kwargs.get("max_trials", -1) # n = kwargs.get("n", None) exp_id = infer_kwargs.get('experiment_id') # map_width = infer_kwargs.get("map_width") max_steps = infer_kwargs.get("max_step") eval_controls = infer_kwargs.get("eval_controls") env_name = get_env_name(game, representation) # env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, **kwargs) levels_im_name = "{}_{}-bins_levels.png" # if n is None: # if EXPERIMENT_ID is None: # n = max_exp_idx(exp_name) # print( # "Experiment index not specified, setting index automatically to {}".format( # n # ) # ) # else: # n = EXPERIMENT_ID # if n == 0: # raise Exception( # "Did not find ranked saved model of experiment: {}".format(exp_name) # ) crop_size = infer_kwargs.get("cropped_size") if crop_size == -1: infer_kwargs["cropped_size"] = get_crop_size(game) log_dir = os.path.join(EXPERIMENT_DIR, '{}_{}_log'.format(exp_name, exp_id)) eval_dir = os.path.join(log_dir, 'eval') if not os.path.isdir(eval_dir): os.mkdir(eval_dir) # log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, exp_id) data_path = os.path.join(eval_dir, "{}_eval_data".format(N_BINS)) data_path_levels = os.path.join(eval_dir, "{}_eval_data_levels".format(N_BINS)) if fix_trgs: data_path += "_fixTrgs" data_path_levels += "_fixTrgs" data_path += ".pkl" data_path_levels += ".pkl" if VIS_ONLY: # if RENDER_LEVELS: # eval_data_levels = pickle.load(open(data_path_levels, "rb")) # eval_data_levels.render_levels() # return eval_data = pickle.load(open(data_path, "rb")) # FIXME: just for backward compatibility eval_data.eval_dir = eval_dir eval_data.render_levels() eval_data.visualize_data(eval_dir, fix_trgs) # eval_data.hamming_heatmap(None, eval_data.div_scores) return # no log dir, 1 parallel environment n_cpu = infer_kwargs.get("n_cpu") infer_kwargs['render_path'] = True env, dummy_action_space, n_tools = make_vec_envs(env_name, representation, None, **infer_kwargs) model = load_model(log_dir, load_best=infer_kwargs.get("load_best"), n_tools=n_tools) # model.set_env(env) env.action_space = dummy_action_space env = env.envs[0] # Record final values of each trial # if 'binary' in env_name: # path_lengths = [] # changes = [] # regions = [] # infer_info = { # 'path_lengths': [], # 'changes': [], # 'regions': [], # } if n_cpu == 1: # control_bounds = env.envs[0].get_control_bounds() control_bounds = env.get_control_bounds() elif n_cpu > 1: raise Exception("no homie, no") # supply args and kwargs env.remotes[0].send(("env_method", ("get_control_bounds", [], {}))) control_bounds = env.remotes[0].recv() if not eval_controls: eval_controls = control_bounds.keys() if len(control_bounds) == 0: # Then this is a non-controllable agent. # Can't we just do this in all cases though? control_bounds = env.cond_bounds ctrl_bounds = [(k, control_bounds[k]) for k in eval_controls] # if len(ctrl_bounds) == 0 and DIVERSITY_EVAL: # N_MAPS = 100 # N_TRIALS = 1 # Hackish get initial states init_states = [] for i in range(N_MAPS): env.reset() # TODO: set initial states in either of these domains? if not (RCT or SC): init_states.append(env.unwrapped._rep._map) N_EVALS = N_TRIALS * N_MAPS def eval_static_trgs(): '''Run an evaluation on the default values for all level metrics. For both controllable and vanilla agents. The latter's "home turf."''' N_BINS = None level_images = [] cell_scores = np.zeros(shape=(1, 1, N_EVALS)) div_scores = np.zeros(shape=(1, 1)) cell_static_scores = np.zeros(shape=(1, 1, N_EVALS)) cell_ctrl_scores = np.zeros(shape=(1, 1, N_EVALS)) level_tokens = None # if DIVERSITY_EVAL: # n_row = 1 # n_col = 1 # else: n_row = 2 n_col = 5 for i in range(n_row): level_images_y = [] for j in range(n_col): net_score, ctrl_score, static_score, level_image, tokens = eval_episodes( model, env, N_EVALS, n_cpu, init_states, eval_dir, env.unwrapped._prob.static_trgs, max_steps, ) level_images_y.append(level_image) cell_scores[0, 0, :] = net_score div_score = np.sum( [np.sum(a != b) for a in tokens for b in tokens]) / (len(tokens) * (len(tokens) - 1)) div_score = div_score / (map_width * map_width) div_scores[0, 0] = div_score level_images.append(np.hstack(level_images_y)) image = np.vstack(level_images[::-1]) image = Image.fromarray(image) image.save( os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS))) return cell_scores, cell_static_scores, cell_ctrl_scores, div_scores, level_tokens, image if len(ctrl_bounds) == 0: # If we didn't train with controls, we'll evaluate inside a grid of targets (on the controllable agents' turf) # and record scores for the cell corresponding to the default static targets (on the vanilla turf), # depending on the value of fix_trgs. ctrl_names = prob_cond_metrics[problem] ctrl_bounds = [(k, env.cond_bounds[k]) for k in ctrl_names] if fix_trgs: ctrl_names = None ctrl_ranges = None cell_scores, cell_static_scores, cell_ctrl_scores, div_scores, level_tokens, image = eval_static_trgs( ) elif len(ctrl_bounds) == 1: ctrl_name = ctrl_bounds[0][0] bounds = ctrl_bounds[0][1] step_size = max((bounds[1] - bounds[0]) / (N_BINS[0] - 1), 1) eval_trgs = np.arange(bounds[0], bounds[1] + 1, step_size) level_images = [] cell_scores = np.zeros((len(eval_trgs), 1, N_EVALS)) cell_ctrl_scores = np.zeros(shape=(len(eval_trgs), 1, N_EVALS)) cell_static_scores = np.zeros(shape=(len(eval_trgs), 1, N_EVALS)) level_tokens = [] div_scores = np.zeros((len(eval_trgs), 1)) for i, trg in enumerate(eval_trgs): trg_dict = {ctrl_name: trg} print("evaluating control targets: {}".format(trg_dict)) # set_ctrl_trgs(env, {ctrl_name: trg}) net_score, ctrl_score, static_score, level_image, tokens = eval_episodes( model, env, N_EVALS, n_cpu, init_states, eval_dir, trg_dict, max_steps) div_score = div_calc(tokens) div_scores[i, 0] = div_score if i % LVL_RENDER_INTERVAL == 0: level_images.append(level_image) cell_scores[i, :, :] = net_score cell_ctrl_scores[i, :, :] = ctrl_score cell_static_scores[i, :, :] = static_score level_tokens.append(tokens) ctrl_names = (ctrl_name, None) ctrl_ranges = (eval_trgs, None) # if "regions" in ctrl_ranges: # # hack it to ensure our default static trgs are in the heatmap, so we can compare on baseline's turf # ctrl_ranges["regions"][0] = 1 ims = np.hstack(level_images) image = Image.fromarray(ims) image.save( os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS))) elif len(ctrl_bounds) >= 2: ctrl_0, ctrl_1 = ctrl_bounds[0][0], ctrl_bounds[1][0] b0, b1 = ctrl_bounds[0][1], ctrl_bounds[1][1] step_0 = max((b0[1] - b0[0]) / (N_BINS[0] - 1), 1) step_1 = max((b1[1] - b1[0]) / (N_BINS[-1] - 1), 1) trgs_0 = np.arange(b0[0], b0[1] + 0.5, step_0) trgs_1 = np.arange(b1[0], b1[1] + 0.5, step_1) cell_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS)) div_scores = np.zeros(shape=(len(trgs_0), len(trgs_1))) cell_ctrl_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS)) cell_static_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS)) level_tokens = [[None] * len(trgs_0)] * len(trgs_1) # Wait what? trg_dict = env.static_trgs trg_dict = dict([(k, min(v)) if isinstance(v, tuple) else (k, v) for (k, v) in trg_dict.items()]) level_images = [] for i, t0 in enumerate(trgs_0): level_images_y = [] for j, t1 in enumerate(trgs_1): ctrl_trg_dict = {ctrl_0: t0, ctrl_1: t1} trg_dict.update(ctrl_trg_dict) print("evaluating control targets: {}".format(trg_dict)) # set_ctrl_trgs(env, {ctrl_name: trg}) net_score, ctrl_score, static_score, level_image, tokens = eval_episodes( model, env, N_EVALS, n_cpu, init_states, eval_dir, trg_dict, max_steps, ) if j % LVL_RENDER_INTERVAL == 0: level_images_y.append(level_image) cell_scores[i, j, :] = net_score cell_ctrl_scores[i, j, :] = ctrl_score cell_static_scores[i, j, :] = static_score div_score = div_calc(tokens) div_scores[i, j] = div_score # level_tokens[j][i] = tokens if i % LVL_RENDER_INTERVAL == 0: level_images.append(np.hstack(level_images_y)) # level_tokens.append(tokens) ctrl_names = (ctrl_0, ctrl_1) ctrl_ranges = (trgs_0, trgs_1) image = None image = np.vstack(level_images[::-1]) image = Image.fromarray(image) image.save( os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS))) levels_im_path = os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS)) eval_data = EvalData( ctrl_names, ctrl_ranges, cell_scores, cell_ctrl_scores, cell_static_scores, div_scores=div_scores, eval_dir=eval_dir, levels_image=image, levels_im_path=levels_im_path, ) pickle.dump(eval_data, open(data_path, "wb")) eval_data.visualize_data(eval_dir, fix_trgs) # else: # levels_im_path = os.path.join( # eval_dir, levels_im_name.format(ctrl_names, N_BINS) # ) # eval_data_levels = EvalData( # ctrl_names, # ctrl_ranges, # cell_scores, # cell_ctrl_scores, # cell_static_scores, # div_scores=div_scores, # eval_dir=eval_dir, # levels_image=image, # levels_im_path=levels_im_path, # ) # pickle.dump(eval_data_levels, open(data_path_levels, "wb")) if not fix_trgs: eval_data.render_levels() if DIVERSITY_EVAL: # eval_data = eval_data if fix_trgs: eval_data.save_stats(div_scores=div_scores, fix_trgs=fix_trgs) else: pass # eval_data.hamming_heatmap(level_tokens, div_scores=div_scores) env.close()