# We first choose a model model = SAC(obs_dim=obs_dim, n_actions=n_actions, act_lim=act_lim, save_dir=model_dir, discount=gamma, lr=lr, seed=seed, polyak_coef=polyak_coef, temperature=temperature, hidden_layers=hidden_layers, n_hidden_units=n_hidden_units, env=train_env) # Now we are going to create an Agent to train / test the model agent = Agent(model=model, replay_buffer=replay_buffer, train_env=train_env, test_env=test_env, replay_start_size=replay_start_size, n_episodes=n_episodes, batch_size=batch_size, n_actions=n_actions) if train: # Perform a training using an agent with a certain model agent.train() else: # We are going to test an existing model agent.test(model_path)
def run(): if sys.platform.startswith('win'): import win32api import win32con import _thread def _win_handler(event, hook_sigint=_thread.interrupt_main): if event == 0: hook_sigint() return 1 return 0 # Add the _win_handler function to the windows console's handler function list win32api.SetConsoleCtrlHandler(_win_handler, 1) options = docopt(__doc__) options = get_options(dict(options)) print(options) default_config = load_yaml(f'config.yaml') # gym > unity > unity_env model_args = Config(**default_config['model']) train_args = Config(**default_config['train']) env_args = Config() buffer_args = Config(**default_config['buffer']) model_args.algo = options.algo model_args.use_rnn = options.use_rnn model_args.algo_config = options.algo_config model_args.seed = options.seed model_args.load = options.load env_args.env_num = options.n_copys if options.gym: train_args.add_dict(default_config['gym']['train']) train_args.update({'render_episode': options.render_episode}) env_args.add_dict(default_config['gym']['env']) env_args.type = 'gym' env_args.env_name = options.gym_env env_args.env_seed = options.gym_env_seed else: train_args.add_dict(default_config['unity']['train']) env_args.add_dict(default_config['unity']['env']) env_args.type = 'unity' env_args.port = options.port env_args.sampler_path = options.sampler env_args.env_seed = options.unity_env_seed if options.unity: env_args.file_path = None env_args.env_name = 'unity' else: env_args.update({'file_path': options.env}) if os.path.exists(env_args.file_path): env_args.env_name = options.unity_env or os.path.join( *os.path.split(env_args.file_path)[0].replace('\\', '/').replace(r'//', r'/').split('/')[-2:] ) if 'visual' in env_args.env_name.lower(): # if traing with visual input but do not render the environment, all 0 obs will be passed. options.graphic = True else: raise Exception('can not find this file.') if options.inference: env_args.train_mode = False env_args.render = True else: env_args.train_mode = True env_args.render = options.graphic train_args.index = 0 train_args.name = NAME train_args.use_wandb = options.use_wandb train_args.inference = options.inference train_args.prefill_choose = options.prefill_choose train_args.base_dir = os.path.join(options.store_dir or BASE_DIR, env_args.env_name, model_args.algo) train_args.update( dict([ ['name', options.name], ['max_step_per_episode', options.max_step_per_episode], ['max_train_step', options.max_train_step], ['max_train_frame', options.max_train_frame], ['max_train_episode', options.max_train_episode], ['save_frequency', options.save_frequency], ['pre_fill_steps', options.prefill_steps], ['info', options.info] ]) ) if options.inference: Agent(env_args, model_args, buffer_args, train_args).evaluate() return trails = options.models if trails == 1: agent_run(env_args, model_args, buffer_args, train_args) elif trails > 1: processes = [] for i in range(trails): _env_args = deepcopy(env_args) _model_args = deepcopy(model_args) _model_args.seed += i * 10 _buffer_args = deepcopy(buffer_args) _train_args = deepcopy(train_args) _train_args.index = i if _env_args.type == 'unity': _env_args.port = env_args.port + i p = Process(target=agent_run, args=(_env_args, _model_args, _buffer_args, _train_args)) p.start() time.sleep(10) processes.append(p) [p.join() for p in processes] else: raise Exception('trials must be greater than 0.')
def main(): logging.basicConfig(level=logging.DEBUG, format="[%(asctime)s %(filename)s] %(message)s") json_file = open('params.json') json_str = json_file.read() config = json.loads(json_str) args = __create_args() config = __adjust_config(args, config) # loading the dataset train_loader, valid_loader = __load_data(config) # Creating the model model = NN(config['model']['architecture'], is_maskable=True) model = model.to(config['device']) initial_mask = model.masks # Getting the criterion, optimizer criterion = nn.CrossEntropyLoss() optimizer = get_optimizer(config, model) ######################### # Agent and Environment # ######################### ACTIONS = create_environment(model.masks, config['environment_protocol']) random.seed(42) shuffle(ACTIONS) N_STATES = len(ACTIONS) N_EPISODES = config['mdp']['N_EPISODES'] MAX_EPISODE_PER_STEPS = config['mdp']['MAX_STEPS_PER_EPISODES'] MIN_ALPHA = config['mdp']['MIN_ALPHA'] GAMMA = config['mdp']['GAMMA'] alphas = np.linspace(1.0, MIN_ALPHA, N_EPISODES) q_table = dict() start_state = State(model.masks, ACTIONS) ########################## # Create sub_working_dir # ########################## sub_working_dir = '{}/results/{}/{}/{}/{}'.format( config['working_dir'], config['model']['name'], '_' + config['mdp']['Q_COMPUTATION'], '{}_{}_{}/{}_{}'.format(time.strftime("%d", time.localtime()), time.strftime("%m", time.localtime()), time.strftime("%Y", time.localtime()), time.strftime("%H", time.localtime()), time.strftime("%M", time.localtime())), 'ALPHA_SEARCH__MIN_ALPHA-{}__GAMMA-{}__PRUNE_TYPE-{}__PRUNE_PERCENT-{}__EPSILON-{}__REWARD_TYPE-{}' .format(MIN_ALPHA, GAMMA if config['mdp']['Q_COMPUTATION'] != 'QL_M' else 'None', config['environment_protocol'], config['agent']['prune_percentage'], config['agent']['epsilon'], config['agent']['reward_type'])) if not os.path.exists(sub_working_dir): os.makedirs(sub_working_dir) config["sub_working_dir"] = sub_working_dir logging.info("sub working dir: %s" % sub_working_dir) ############### # Begin Train # ############### train(model, train_loader, valid_loader, criterion, optimizer, config['train']['epochs'], config['train']['print_every'], config['device']) loss, accuracy = validation(model, valid_loader, criterion) logging.info( 'Validation Loss performed: {}\tValidation Accuracy performed: {}'. format(loss, accuracy)) if config['agent']['reward_type'] == 'ACCURACY': start_state.last_reward = -(1. - accuracy) elif config['agent']['reward_type'] == 'LOSS': start_state.last_reward = -loss elif config['agent']['reward_type'] == 'ACC_COMPRESSION': start_state.last_reward = -(1. - accuracy) elif config['agent']['reward_type'] == 'MY_RCRA': start_state.last_reward = -(1. - accuracy) ######### # Prune # ######### for e in range(N_EPISODES): state = deepcopy(start_state) total_reward = .0 ALPHA = alphas[e] agent = Agent(config, ACTIONS, model, valid_loader, criterion) for i in range(MAX_EPISODE_PER_STEPS): action = agent.choose_action(q_table, state) next_state, reward, done = agent.act(state, action) total_reward += reward if config['mdp']['Q_COMPUTATION'] == 'QL_M': # Q-Learning from Ghallab, Nau and Traverso q_value(q_table, state)[action] = q_value(q_table, state, action) + \ ALPHA * (reward + np.max(q_value(q_table, next_state)) - q_value(q_table, state, action)) elif config['mdp']['Q_COMPUTATION'] == 'QL_WIKI': # Q-Learning from from Wikipedia q_value(q_table, state)[action] = (1. - ALPHA) * q_value(q_table, state, action) + \ ALPHA * (reward + GAMMA * np.max(q_value(q_table, next_state))) del state state = next_state if done: break logging.info("Episode {}: reward type {}: total reward -> {}".format( e + 1, config['agent']['reward_type'], total_reward)) ##################### # Save the solution # ##################### q_table_saver(q_table, config['sub_working_dir'], '/q_table.tsv') agent = Agent(config, ACTIONS, model, valid_loader, criterion) my_state = start_state result = [] done = False while not done: sa = q_value(q_table, my_state) my_action = np.argmax(sa) action = my_state.environment[my_action] my_state, reward, done = agent.act(my_state, my_action) result.append([action, reward]) final = pd.DataFrame(result, columns=['Action', 'Reward']) final.to_csv(config['sub_working_dir'] + '/actions_to_prune.tsv', sep='\t', index=False)
def agent_run(*args): Agent(*args)()
def run(): if sys.platform.startswith('win'): import win32api import win32con import _thread def _win_handler(event, hook_sigint=_thread.interrupt_main): if event == 0: hook_sigint() return 1 return 0 # Add the _win_handler function to the windows console's handler function list win32api.SetConsoleCtrlHandler(_win_handler, 1) options = docopt(__doc__) print(options) default_config = load_yaml(f'config.yaml') # gym > unity > unity_env env_args, model_args, train_args = {}, {}, {} unity_args, gym_args, buffer_args = default_config[ 'unity'], default_config['gym'], default_config['buffer'] model_args['algo'] = str(options['--algorithm']) model_args['algo_config'] = None if options[ '--config-file'] == 'None' else str(options['--config-file']) model_args['seed'] = int(options['--seed']) model_args['load'] = None if options['--load'] == 'None' else str( options['--load']) model_args['logger2file'] = default_config['logger2file'] train_args['index'] = 0 train_args['all_learner_print'] = default_config['all_learner_print'] train_args['name'] = NAME if options['--name'] == 'None' else str( options['--name']) train_args['max_step'] = default_config['max_step'] if options[ '--max-step'] == 'None' else int(options['--max-step']) train_args['max_episode'] = default_config['max_episode'] if options[ '--max-episode'] == 'None' else int(options['--max-episode']) train_args['save_frequency'] = default_config['save_frequency'] if options[ '--save-frequency'] == 'None' else int(options['--save-frequency']) train_args['inference'] = bool(options['--inference']) train_args['fill_in'] = bool(options['--fill-in']) train_args['no_op_choose'] = bool(options['--noop-choose']) train_args['info'] = default_config['info'] if options[ '--info'] == 'None' else str(options['--info']) if options['--gym']: env_args['type'] = 'gym' env_args['env_name'] = str(options['--gym-env']) env_args['env_num'] = int(options['--gym-agents']) env_args['env_seed'] = int(options['--gym-env-seed']) env_args['render_mode'] = gym_args['render_mode'] env_args['action_skip'] = gym_args['action_skip'] env_args['skip'] = gym_args['skip'] env_args['obs_stack'] = gym_args['obs_stack'] env_args['stack'] = gym_args['stack'] env_args['obs_grayscale'] = gym_args['obs_grayscale'] env_args['obs_resize'] = gym_args['obs_resize'] env_args['resize'] = gym_args['resize'] env_args['obs_scale'] = gym_args['obs_scale'] train_args['render_episode'] = gym_args['render_episode'] if options[ '--render-episode'] == 'None' else int(options['--render-episode']) train_args['no_op_steps'] = gym_args['random_steps'] train_args['render'] = gym_args['render'] train_args['eval_while_train'] = gym_args['eval_while_train'] train_args['max_eval_episode'] = gym_args['max_eval_episode'] else: env_args['type'] = 'unity' if options['--unity']: env_args['file_path'] = None env_args['env_name'] = 'unity' else: env_args['file_path'] = unity_args['exe_file'] if options[ '--env'] == 'None' else str(options['--env']) if os.path.exists(env_args['file_path']): env_args['env_name'] = os.path.join( *os.path.split(env_args['file_path'])[0].replace( '\\', '/').replace(r'//', r'/').split('/')[-2:]) else: raise Exception('can not find this file.') if bool(options['--inference']): env_args['train_mode'] = False else: env_args['train_mode'] = True env_args['port'] = int(options['--port']) env_args['render'] = bool(options['--graphic']) env_args['sampler_path'] = None if options[ '--sampler'] == 'None' else str(options['--sampler']) env_args['reset_config'] = unity_args['reset_config'] train_args['no_op_steps'] = unity_args['no_op_steps'] train_args['base_dir'] = os.path.join( BASE_DIR if options['--store-dir'] == 'None' else str( options['--store-dir']), env_args['env_name'], model_args['algo']) if bool(options['--inference']): Agent(env_args, model_args, buffer_args, train_args).evaluate() trails = int(options['--modes']) if trails == 1: agent_run(env_args, model_args, buffer_args, train_args) elif trails > 1: processes = [] for i in range(trails): _env_args = deepcopy(env_args) _model_args = deepcopy(model_args) _model_args['seed'] += i * 10 _buffer_args = deepcopy(buffer_args) _train_args = deepcopy(train_args) _train_args['index'] = i if _env_args['type'] == 'unity': _env_args['port'] = env_args['port'] + i p = Process(target=agent_run, args=(_env_args, _model_args, _buffer_args, _train_args)) p.start() time.sleep(10) processes.append(p) [p.join() for p in processes] else: raise Exception('trials must be greater than 0.')