def main(): start_timestamp = datetime.datetime.now().isoformat() save_dir = os.path.join('trained_models', start_timestamp) os.makedirs(save_dir, exist_ok=True) print("Saving models to: {}".format(save_dir), file=sys.stderr) log_path = os.path.join(save_dir, '0000_log.csv') logger = CSVLogger(log_path, ['ep', 'step', 'score', 'aug_score', 'exploration_rate']) env = gym.make(ENV_NAME) observation_space = env.observation_space.shape[0] action_space = env.action_space.n dqn_solver = DQNSolver(observation_space, action_space) for ep in range(NUM_TRAIN_EPISODES): ep_start = time.time() state = env.reset() state = np.reshape(state, [1, observation_space]) ep_score = 0 ep_aug_score = 0 ep_step = 0 while True: ep_step += 1 action = dqn_solver.act(state) state_next, reward, terminal, info = env.step(action) ep_score += reward if REWARD_CLOSER_TO_GROUND: # ground_reward = max(0, (1 - state_next[1]) * 1) # reward += ground_reward # reward += min(0, state_next[1] * (-10)) reward += min(0, state_next[1] * (-5)) if REWARD_CLOSER_TO_CENTER: # reward += min(0, np.abs(state_next[0]) * (-10)) reward += min(0, np.abs(state_next[0]) * (-5)) ep_aug_score += reward state_next = np.reshape(state_next, [1, observation_space]) dqn_solver.remember(state, action, reward, state_next, terminal) state = state_next if terminal: ep_duration = time.time() - ep_start logger.log(ep, ep_step, ep_score, ep_aug_score, dqn_solver.exploration_rate) print(f'Ep: {ep:5}; Length: {ep_step:3}; Duration: {ep_duration:.2f} s ({ep_duration/ep_step:.4f} per step); Score: {ep_score:.4f}; Exploration rate: {dqn_solver.exploration_rate:.4f}') ep_history['score'].append(ep_score) ep_history['exploration_rate'].append(dqn_solver.exploration_rate) break dqn_solver.experience_replay() print(f'{ep} of {NUM_TRAIN_EPISODES} episodes') if ep % SAVE_EACH_N_EPS == 0 and ep != 0: dqn_solver.model.save(os.path.join(save_dir, f'my_model_EP{ep}.h5'))
def test_callback_has_required_properties_after_init(classifier): cb = CSVLogger(classifier) assert cb.filename assert not cb.path.exists() assert cb.learn is classifier assert cb.file is None
exp_name = '{}-s:{}-optim:{}-lr:{}-T:{}-K:{}-N:{}-sigma:{}-seed:{}'.format( args.estimate, args.schedule_type, args.optimizer, args.outer_lr, args.T, args.K, args.N, args.sigma, args.seed) save_dir = os.path.join(args.save_dir, exp_name) if not os.path.exists(save_dir): os.makedirs(save_dir) # Save command-line arguments with open(os.path.join(save_dir, 'args.yaml'), 'w') as f: yaml.dump(vars(args), f) iteration_logger = CSVLogger(fieldnames=[ 'time_elapsed', 'iteration', 'inner_problem_steps', 'theta0', 'theta1', 'theta0_grad', 'theta1_grad', 'L' ], filename=os.path.join(save_dir, 'iteration.csv')) @jax.jit def loss(x): """Inner loss surface""" return jnp.sqrt(x[0]**2 + 5) - jnp.sqrt(5) + jnp.sin(x[1])**2 * jnp.exp( -5 * x[0]**2) + 0.25 * jnp.abs(x[1] - 100) loss_grad = jax.jit(jax.grad(loss)) @jax.jit
def main(): """ """ exp_path = Path.cwd() torch.cuda.empty_cache() # get user to choose training config config_file = "ResNet.json" # Load Test Parameters with open(config_file, "r") as f: x = json.load(f) hyperparams = x["HYPERPARAMETERS"] name = x["model"] n_bands = hyperparams['n_bands'] patch_size = hyperparams['patch_size'] _, path = utils.experiment_path_build(x) c_drive_docs = Path(x["LOGGING"]["log_location"]) log_file = Path(x["LOGGING"]["log_file"]) default_device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # Parameters validation_split = hyperparams["validation_split"] test_split = hyperparams["test_split"] random_seed = x["BASIC_PARAMETERS"]["random_seed"] shuffle_dataset = x["LOCATIONS"]["shuffle_dataset"] disp_batch = hyperparams["disp_batch"] # images to display on test epochs = hyperparams["epoch"] bs = hyperparams["batch_size"] n_jobs = x["BASIC_PARAMETERS"]["n_jobs"] lr = hyperparams["learning_rate"] HSI_ds = HSI_Dataset(path) _, classes = utils.get_classes(path) num_classes = len(classes) train_ds_sampler, valid_ds_sampler, test_ds_sampler = train_test_valid_split( HSI_ds, shuffle_dataset, validation_split, test_split, random_seed) phases = make_phases(HSI_ds, train_ds_sampler, valid_ds_sampler, test_ds_sampler, bs=32, n_jobs=4, disp_batch=disp_batch) model = Net(ResidualBlock, [2, 4, 8], in_channels=n_bands, num_classes=num_classes).to(default_device) # Lee Model Call # model = Net(n_bands, n_classes, patch_size) opt = optim.Adam(model.parameters(), lr=1e-2) training_params = {'Dataset Path': path, 'Experiment Path': exp_path, 'number of classes': num_classes, 'number of bands': n_bands, 'patch size': patch_size, 'test_train Split': { 'validation split': validation_split, 'shuffle dataset': shuffle_dataset, 'random seed': random_seed}, 'Batch Size': bs, "Number of Jobs": n_jobs, "Learning Rate": lr, "Scheduler": "One Cycle Schedule" } cb = CallbacksGroup([ RollingLoss(), Accuracy(), Scheduler( OneCycleSchedule(t=len(phases[0].loader) * epochs), mode='batch' ), StreamLogger(), CSVLogger(c_drive_docs.joinpath(log_file), training_params), ProgressBar(), save_model(c_drive_docs, name), test_images(path=c_drive_docs, batch_size=5, classes=classes) ]) # save_model(exp_path, name) train(model, opt, phases, cb, epochs=epochs, device=default_device, loss_fn=F.cross_entropy) lr_history = pd.DataFrame(cb['scheduler'].parameter_history('lr')) ax = lr_history.plot(figsize=(8, 6)) ax.set_xlabel('Training Batch Index') ax.set_ylabel('Learning Rate') fig = ax.get_figure() file_loc = [str(c_drive_docs) + "\\checkpoints\\lr-test.jpg"] s = "" s = s.join(file_loc) conf_path = Path(s) fig.savefig(conf_path)
args = parser.parse_args() random.seed(args.seed) onp.random.seed(args.seed) exp_name = '{}-{}-lr:{}-sigma:{}-N:{}-T:{}-K:{}-n:{}-c:{}-d:{}'.format( args.estimate, args.env_name, args.lr, args.noise, args.N, args.horizon, args.K, int(args.normalize_state), int(args.clip_rewards), int(args.divide_by_variance)) save_dir = os.path.join(args.save_dir, exp_name, 'seed_{}'.format(args.seed)) if not os.path.exists(save_dir): os.makedirs(save_dir) iteration_logger = CSVLogger(fieldnames=[ 'time', 'iteration', 'total_steps', 'reward', 'theta_grad_norm' ], filename=os.path.join(save_dir, 'iteration.csv')) total_count = 0 class Normalizer(): def __init__(self, num_inputs): self.n = onp.zeros(num_inputs) self.mean = onp.zeros(num_inputs) self.mean_diff = onp.zeros(num_inputs) self.var = onp.zeros(num_inputs) def observe(self, x): self.n += 1. last_mean = self.mean.copy()
def __init__(self, environment_name: str, algorithm: str = 'acer', algorithm_parameters: Optional[dict] = None, num_parallel_envs: int = 5, evaluate_time_steps_interval: int = 1500, num_evaluation_runs: int = 5, log_dir: str = 'logs/', max_time_steps: int = -1, record_end: bool = True, experiment_name: str = None, asynchronous: bool = True, log_tensorboard: bool = True, do_checkpoint: bool = True, record_time_steps: int = None): """Trains and evaluates the agent. Args: environment_name: Name of the gym's environment to be used. algorithm: Algorithm name, one of the following: ['acer', 'acerac'] algorithm_parameters: Dictionary with the parameters of the algorithm. num_parallel_envs: Number of parallel environments to be used. evaluate_time_steps_interval: Number of time steps between evaluation runs, -1 if no evaluation should be conducted. num_evaluation_runs: Number of episodes per one evaluation. log_dir: Logging directory. max_time_steps: Maximum number of training time steps. record_end: True if video should be recorded after training. experiment_name: A string that is included in the name of the log directory asynchronous: True to use concurrent envs. log_tensorboard: True to create TensorBoard logs. do_checkpoint: True to save checkpoints over the training. """ self._elapsed_time_measure = 0 self._time_step = 0 self._done_episodes = 0 self._next_evaluation_timestamp = 0 self._next_record_timestamp = 0 self._n_envs = num_parallel_envs self._evaluate_time_steps_interval = evaluate_time_steps_interval self._num_evaluation_runs = num_evaluation_runs self._max_time_steps = max_time_steps self._log_tensorboard = log_tensorboard self._do_checkpoint = do_checkpoint self._env_name = environment_name if experiment_name: self._log_dir = Path( f"{log_dir}/{environment_name}_{algorithm}_{experiment_name}" f"_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}") else: self._log_dir = Path( f"{log_dir}/{environment_name}_{algorithm}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}" ) self._log_dir.mkdir(parents=True, exist_ok=True) self._record_end = record_end self._record_time_steps = record_time_steps self._env = _get_env(environment_name, num_parallel_envs, asynchronous) self._evaluate_env = _get_env(environment_name, num_evaluation_runs, asynchronous) self._done_steps_in_a_episode = [0] * self._n_envs self._returns = [0] * self._n_envs self._rewards = [[] for _ in range(self._n_envs)] dummy_env = self._env.env_fns[0]() self._max_steps_in_episode = dummy_env.spec.max_episode_steps if self._log_tensorboard: tensor_board_writer = tf.summary.create_file_writer( str(self._log_dir)) tensor_board_writer.set_as_default() self._csv_logger = CSVLogger( self._log_dir / 'results.csv', keys=['time_step', 'eval_return_mean', 'eval_std_mean']) self._save_parameters(algorithm_parameters) self._agent = _get_agent(algorithm, algorithm_parameters, dummy_env.observation_space, dummy_env.action_space) self._current_obs = self._env.reset()
class Runner: MEASURE_TIME_TIME_STEPS = 1000 def __init__(self, environment_name: str, algorithm: str = 'acer', algorithm_parameters: Optional[dict] = None, num_parallel_envs: int = 5, evaluate_time_steps_interval: int = 1500, num_evaluation_runs: int = 5, log_dir: str = 'logs/', max_time_steps: int = -1, record_end: bool = True, experiment_name: str = None, asynchronous: bool = True, log_tensorboard: bool = True, do_checkpoint: bool = True, record_time_steps: int = None): """Trains and evaluates the agent. Args: environment_name: Name of the gym's environment to be used. algorithm: Algorithm name, one of the following: ['acer', 'acerac'] algorithm_parameters: Dictionary with the parameters of the algorithm. num_parallel_envs: Number of parallel environments to be used. evaluate_time_steps_interval: Number of time steps between evaluation runs, -1 if no evaluation should be conducted. num_evaluation_runs: Number of episodes per one evaluation. log_dir: Logging directory. max_time_steps: Maximum number of training time steps. record_end: True if video should be recorded after training. experiment_name: A string that is included in the name of the log directory asynchronous: True to use concurrent envs. log_tensorboard: True to create TensorBoard logs. do_checkpoint: True to save checkpoints over the training. """ self._elapsed_time_measure = 0 self._time_step = 0 self._done_episodes = 0 self._next_evaluation_timestamp = 0 self._next_record_timestamp = 0 self._n_envs = num_parallel_envs self._evaluate_time_steps_interval = evaluate_time_steps_interval self._num_evaluation_runs = num_evaluation_runs self._max_time_steps = max_time_steps self._log_tensorboard = log_tensorboard self._do_checkpoint = do_checkpoint self._env_name = environment_name if experiment_name: self._log_dir = Path( f"{log_dir}/{environment_name}_{algorithm}_{experiment_name}" f"_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}") else: self._log_dir = Path( f"{log_dir}/{environment_name}_{algorithm}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}" ) self._log_dir.mkdir(parents=True, exist_ok=True) self._record_end = record_end self._record_time_steps = record_time_steps self._env = _get_env(environment_name, num_parallel_envs, asynchronous) self._evaluate_env = _get_env(environment_name, num_evaluation_runs, asynchronous) self._done_steps_in_a_episode = [0] * self._n_envs self._returns = [0] * self._n_envs self._rewards = [[] for _ in range(self._n_envs)] dummy_env = self._env.env_fns[0]() self._max_steps_in_episode = dummy_env.spec.max_episode_steps if self._log_tensorboard: tensor_board_writer = tf.summary.create_file_writer( str(self._log_dir)) tensor_board_writer.set_as_default() self._csv_logger = CSVLogger( self._log_dir / 'results.csv', keys=['time_step', 'eval_return_mean', 'eval_std_mean']) self._save_parameters(algorithm_parameters) self._agent = _get_agent(algorithm, algorithm_parameters, dummy_env.observation_space, dummy_env.action_space) self._current_obs = self._env.reset() def run(self): """Performs training. If 'evaluate' is True, evaluation of the policy is performed. During the evaluation policy that is being optimized is used without exploration. """ while self._max_time_steps == -1 or self._time_step <= self._max_time_steps: if self._is_time_to_evaluate(): self._evaluate() if self._time_step != 0: self._save_results() if self._do_checkpoint: self._save_checkpoint() if self._is_time_to_record(): self._record_video() start_time = time.time() experience = self._step() self._agent.save_experience(experience) self._agent.learn() self._elapsed_time_measure += time.time() - start_time self._csv_logger.close() if self._record_end: self._record_video() def _save_results(self): self._csv_logger.dump() logging.info(f"saved evaluation results in {self._log_dir}") def _step( self ) -> List[Tuple[Union[int, float], np.array, float, float, bool, bool]]: actions, policies = self._agent.predict_action(self._current_obs) steps = self._env.step(actions) rewards = [] experience = [] old_obs = self._current_obs self._current_obs = steps[0] for i in range(self._n_envs): # 'is_done' from Gym does not take into account maximum number of steps in a single episode constraint self._time_step += 1 if self._time_step % Runner.MEASURE_TIME_TIME_STEPS == 0: self._measure_time() rewards.append(steps[1][i]) self._done_steps_in_a_episode[i] += 1 is_done_gym = steps[2][i] is_maximum_number_of_steps_reached = self._max_steps_in_episode is not None \ and self._max_steps_in_episode == self._done_steps_in_a_episode[i] is_done = is_done_gym and not is_maximum_number_of_steps_reached is_end = is_done or is_maximum_number_of_steps_reached reward = steps[1][i] experience.append((actions[i], old_obs[i], self._current_obs[i], reward, policies[i], is_done, is_end)) self._returns[i] += steps[1][i] self._rewards[i].append(steps[1][i]) if is_end: self._done_episodes += 1 logging.info(f"finished episode {self._done_episodes}, " f"return: {self._returns[i]}, " f"total time steps done: {self._time_step}") with tf.name_scope('rewards'): tf.summary.histogram('rewards', self._rewards[i], self._done_episodes) tf.summary.scalar('return', self._returns[i], self._done_episodes) tf.summary.scalar('episode length', self._done_steps_in_a_episode[i], self._done_episodes) self._returns[i] = 0 self._rewards[i] = [] self._done_steps_in_a_episode[i] = 0 self._current_obs = np.array(self._current_obs) return experience def _evaluate(self): self._next_evaluation_timestamp += self._evaluate_time_steps_interval returns = [0] * self._num_evaluation_runs envs_finished = [False] * self._num_evaluation_runs time_step = 0 current_obs = self._evaluate_env.reset() while not all(envs_finished): time_step += 1 actions, _ = self._agent.predict_action(current_obs, is_deterministic=True) steps = self._evaluate_env.step(actions) current_obs = steps[0] for i in range(self._num_evaluation_runs): if not envs_finished[i]: returns[i] += steps[1][i] is_done_gym = steps[2][i] is_maximum_number_of_steps_reached = self._max_steps_in_episode is not None\ and self._max_steps_in_episode == time_step is_end = is_done_gym or is_maximum_number_of_steps_reached envs_finished[i] = is_end if is_end: logging.info(f"evaluation run, " f"return: {returns[i]}") mean_returns = np.mean(returns) std_returns = np.std(returns) with tf.name_scope('rewards'): tf.summary.scalar('evaluation_return_mean', mean_returns, self._time_step) tf.summary.scalar('evaluation_return_std', std_returns, self._time_step) self._csv_logger.log_values({ 'time_step': self._time_step, 'eval_return_mean': mean_returns, 'eval_std_mean': std_returns }) def _record_video(self): if self._record_time_steps: self._next_record_timestamp += self._record_time_steps logging.info(f"saving video...") try: env = wrappers.Monitor(gym.make(self._env_name), self._log_dir / f'video-{self._time_step}', force=True, video_callable=lambda x: True) is_end = False time_step = 0 current_obs = np.array([env.reset()]) while not is_end: time_step += 1 actions, _ = self._agent.predict_action(current_obs, is_deterministic=True) steps = env.step(actions[0]) current_obs = np.array([steps[0]]) is_done_gym = steps[2] is_maximum_number_of_steps_reached = self._max_steps_in_episode is not None\ and self._max_steps_in_episode == time_step is_end = is_done_gym or is_maximum_number_of_steps_reached env.close() logging.info( f"saved video in {str(self._log_dir / f'video-{self._time_step}')}" ) except Exception as e: logging.error( f"Error while recording the video. Make sure you've got proper drivers" f"and libraries installed (i.e ffmpeg). Error message:\n {e}") def _is_time_to_evaluate(self): return self._evaluate_time_steps_interval != -1 and self._time_step >= self._next_evaluation_timestamp def _is_time_to_record(self): return self._record_time_steps is not None and self._time_step >= self._next_record_timestamp def _measure_time(self): with tf.name_scope('acer'): tf.summary.scalar( 'time steps per second', Runner.MEASURE_TIME_TIME_STEPS / self._elapsed_time_measure, self._time_step) self._elapsed_time_measure = 0 def _save_parameters(self, algorithm_parameters: dict): with open(str(self._log_dir / 'parameters.json'), 'wt') as f: json.dump(algorithm_parameters, f) def _save_checkpoint(self): checkpoint_dir = self._log_dir / 'checkpoint' checkpoint_dir.mkdir(exist_ok=True) runner_dump = { 'time_step': self._time_step, 'done_episodes': self._done_episodes, } with open(str(checkpoint_dir / 'runner.json'), 'wt') as f: json.dump(runner_dump, f) self._agent.save(checkpoint_dir / 'model') logging.info(f"saved checkpoint in '{str(checkpoint_dir)}'")
def main(): torch.cuda.empty_cache() default_device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') exp_path = Path.cwd() torch.cuda.empty_cache() # get user to choose training config config_file = "training_config.json" # Load Test Parameters with open(config_file, "r") as f: x = json.load(f) hyperparams = x["HYPERPARAMETERS"] name = x["model"] exp_loc = Path(x["LOGGING"]["log_location"]) n_classes = hyperparams['n_classes'] _, path = utils.experiment_path_build(x) log_file = Path(x["LOGGING"]["log_file"]) # Parameters validation_split = hyperparams["validation_split"] n_bands = x["BASIC_PARAMETERS"]["n_bands"] test_split = hyperparams["test_split"] random_seed = x["BASIC_PARAMETERS"]["random_seed"] shuffle_dataset = x["LOCATIONS"]["shuffle_dataset"] disp_batch = hyperparams["disp_batch"] # images to display on test epochs = hyperparams["epoch"] bs = hyperparams["batch_size"] n_jobs = x["BASIC_PARAMETERS"]["n_jobs"] lr = hyperparams["learning_rate"] csv_file = str(path) +'/' + 'Whalemanifest.csv' ###---------------------### ##Old file design #ds = Whale_Audio_Dataset(path, csv_file) ###---------------------### # Params need moving #Hard code for now. Need to implement path to allow the user to choose the data #This is the code for the databunch data = get_data(path = 'E:\\Masters\\Datasets\\Master Whale Sounds\\Master Whale Sounds\\Whale Unzipped - Good', training_type="audio") print(data.train_ds) #train_ds_sampler, valid_ds_sampler, test_ds_sampler = train_test_valid_split( # ds, shuffle_dataset, validation_split, test_split, random_seed) # phases = make_phases(ds, train_ds_sampler, # valid_ds_sampler, test_ds_sampler, bs=bs, n_jobs=n_jobs) phases = make_phases_databunch(data, bs=bs, n_jobs=n_jobs) model = AE(in_dim = 176400, h_dim = 1200).to(default_device) #model = ResNet(block = ResidualBlock, layers = [2,4,8], in_channels=n_bands, num_classes=n_classes) opt = optim.Adam(model.parameters(), lr=lr) training_params = {'Dataset Path': path, 'Experiment Path': exp_path, 'number of classes': n_classes, 'number of bands': n_bands, 'test_train Split': { 'validation split': validation_split, 'shuffle dataset': shuffle_dataset, 'random seed': random_seed}, 'Batch Size': bs, "Nuber of Jobs": n_jobs, "Learning Rate": lr, "Scheduler": "One Cycle Schedule" } cb = CallbacksGroup([ RollingLoss(), Accuracy(), Scheduler( OneCycleSchedule(t=len(phases[0].loader) * epochs), mode='batch' ), StreamLogger(), CSVLogger(exp_loc.joinpath(log_file), training_params), ProgressBar(), save_model(exp_loc, name) ]) train(model, opt, phases, cb, epochs=epochs, device=default_device, loss_fn=F.cross_entropy) lr_history = pd.DataFrame(cb['scheduler'].parameter_history('lr')) ax = lr_history.plot(figsize=(8, 6)) ax.set_xlabel('Training Batch Index') ax.set_ylabel('Learning Rate') fig = ax.get_figure() file_loc = [str(exp_loc) + "\\checkpoints\\lr-test.jpg"] s = "" s = s.join(file_loc) conf_path = Path(s) fig.savefig(conf_path)
elif setting['sched'] in ['linear-pl', 'inverse-time-decay-pl']: for key in general_utils.recursive_keys(temp_params): base_str = '{}/{}'.format(key, setting['param']) param_fieldnames += [ '{}_0'.format(base_str), '{}_1'.format(base_str) ] print('Param fieldnames: {}'.format(param_fieldnames)) cons_param_fieldnames = ['cons/{}'.format(name) for name in param_fieldnames] param_grad_fieldnames = ['grad/{}'.format(name) for name in param_fieldnames] iteration_logger = CSVLogger(fieldnames=[ 'perf/{}'.format(name) for name in [ 'time_elapsed', 'outer_iteration', 'total_inner_iterations', 'train_sum_loss', 'train_acc', 'train_mean_loss', 'val_sum_loss', 'val_acc', 'val_mean_loss', 'unroll_obj' ] ] + cons_param_fieldnames + param_fieldnames, filename=os.path.join(save_dir, 'iteration.csv')) frequent_logger = CSVLogger(fieldnames=[ 'frequent/time_elapsed', 'frequent/outer_iteration', 'frequent/total_inner_iterations', 'frequent/F', ] + cons_param_fieldnames + param_fieldnames + param_grad_fieldnames, filename=os.path.join(save_dir, 'frequent.csv')) # ======================================================================= def to_constrained(theta_unconstrained):
def classifier_and_logger(classifier): classifier.metrics = [accuracy, error_rate] cb = CSVLogger(classifier) return classifier, cb
args.data, args.estimate, args.init_theta, args.lr, args.outer_lr, args.K, args.N, args.sigma, args.seed) save_dir = os.path.join(args.save_dir, exp_name) # Create experiment save directory if not os.path.exists(save_dir): os.makedirs(save_dir) # Save command-line arguments with open(os.path.join(save_dir, 'args.yaml'), 'w') as f: yaml.dump(vars(args), f) iteration_logger = CSVLogger( fieldnames=[ 'outer_iteration', 'total_inner_iterations', 'val_loss', 'theta', 'theta_grad' ], filename=os.path.join(save_dir, 'iteration.csv')) # Based on https://github.com/stanfordmlgroup/ngboost/blob/master/examples/experiments/regression_exp.py dataset_name_to_loader = { 'housing': lambda: pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, delim_whitespace=True, ), 'concrete': lambda: pd.read_excel( 'https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls' ),
help='Save directory') args = parser.parse_args() exp_name = '{}-lr:{}-K:{}-sigma:{}-N:{}'.format( args.estimate, args.lr, args.K, args.sigma, args.N) save_dir = os.path.join(args.save_dir, exp_name) if not os.path.exists(save_dir): os.makedirs(save_dir) # Save command-line arguments with open(os.path.join(save_dir, 'args.yaml'), 'w') as f: yaml.dump(vars(args), f) iteration_logger = CSVLogger(fieldnames=['iteration', 'loss', 'long_unroll_loss', 'theta', 'gradient'], filename=os.path.join(save_dir, 'iteration.csv')) # Influence balancing problem setup # ----------------------------------------------- n = 23 values = jnp.array([0.5]*n) A = jnp.diag(values) + jnp.diag(values, 1)[:n,:n] theta = jnp.array([0.5]) num_positive = 10 # ----------------------------------------------- @partial(jax.jit, static_argnums=2) def unroll(theta, state, K): sign_vector = jnp.array([1] * num_positive + [-1] * (n - num_positive)) theta_vec = jnp.repeat(theta, n) * sign_vector state_current = jnp.array(state)
random.seed(args.seed) onp.random.seed(args.seed) exp_name = '{}-{}-lr:{}-sigma:{}-N:{}-T:{}-K:{}-c:{}-d:{}'.format( args.estimate, args.env_name, args.lr, args.noise, args.N, args.horizon, args.K, int(args.normalize_state), int(args.clip_rewards), int(args.divide_by_variance)) save_dir = os.path.join(args.save_dir, exp_name, 'seed_{}'.format(args.seed)) if not os.path.exists(save_dir): os.makedirs(save_dir) iteration_logger = CSVLogger(fieldnames=[ 'time', 'iteration', 'total_steps', 'reward_mean', 'reward_std', 'reward_max', 'reward_min', 'theta_grad_norm' ], filename=os.path.join(save_dir, 'iteration.csv')) total_count = 0 def get_action(state, params): return onp.dot(params, state) def unroll(params, state, env, t, K, T, training=True, shift=0.0): global total_count reset = False total_reward = 0