def run(conf): cf = xt.Config(conf) xt.info("config", cf) # ----------------------------------------------------- # prepare result directory and TensorBoard if cf.save.do: save_dir = xt.make_dirs_current_time( xt.join(cf.save.directory, cf.env.name)) writer = tf.summary.create_file_writer(save_dir) writer.set_as_default() # ----------------------------------------------------- # preparation env = gym.make(cf.env.name) test_env = gym.make(cf.env.name) xt.info("env", env) agent = generate_ddpg(cf, env) xt.info("ddpg", agent) rb = buffer.ReplayBuffer() xt.info("ReplayBuffer", rb) # ----------------------------------------------------- # main loop main(cf, agent, rb, env, test_env) # ----------------------------------------------------- # dispose env.close() test_env.close()
def prepare_saver(cf, agent): cf.save.path = xt.make_dirs_current_time(cf.save.path) writer = tf.summary.create_file_writer(cf.save.path) writer.set_as_default() checkpoint = tf.train.Checkpoint(model=agent.model()) manager = tf.train.CheckpointManager(checkpoint, xt.join(cf.save.path, "model"), max_to_keep=cf.save.model.num)
def run(config): cf = config if isinstance(config, xt.Config) else xt.Config(config) xt.info("config loaded", cf) # command if cf.command.d2r: cf.command.max = xt.d2r(cf.command.max) cmd = Command(cf.sampling.dt, cf.command.max, cf.command.interval) cmd.reset() xt.info("command generator", cmd) # aircraft model env = LVAircraft(cf.dt, sampling_interval=cf.sampling.dt) xt.info("env", env) # logger capacity = int(cf.due / cf.sampling.dt + 1) buf = np.zeros((capacity, cf.sampling.size)) xt.info("buffer size", buf.shape) # save dir cf.save.path = xt.join(cf.save.path, cf.fail.mode) cf.save.path = xt.make_dirs_current_time(cf.save.path, exist_ok=True) # result file list result_files = [] # set fail cf.fail.mode = MODE[cf.fail.mode] cf.fail.value = cf.fail.value if not cf.fail.d2r else xt.d2r(cf.fail.value) env.elevator.set_fail_mode(cf.fail.mode, cf.fail.value) for round in range(cf.save.num): print("-" * 60) xt.info("round", round) roll_over(cf, cmd, env, buf) labels = ["time", "dec", "de", "u", "w", "q", "theta", "mode"] result = pd.DataFrame( {key: buf[:, i].flatten() for i, key in enumerate(labels)}) ax = result.plot(x="time", y=["dec", "de"]) ax.legend(["command", "elevator"]) # plt.show() plt.savefig(xt.join(cf.save.path, "{:03}.png".format(round))) plt.close() ax.clear() fname = "{:03}.csv".format(round) fname = xt.join(cf.save.path, fname) result_files.append(fname) result.to_csv(fname, index=False) del result buf = reset(env, cmd, buf) result_files = np.array(result_files) np.savetxt(xt.join(cf.save.path, "all.txt"), result_files, fmt="%s") print(result_files)
def main(conf): cf = conf if isinstance(conf, xt.Config) else xt.Config(conf) # model model = FaultDetector(cf.model.units, cf.model.size.input, cf.model.size.output) model.summary() optimizer = tf.keras.optimizers.Adam(learning_rate=cf.train.lr) # data loader train_loader = build_loader(cf.data.train, cf.train.batch.size) test_loader = build_loader(cf.data.test, cf.train.batch.size) # logger if cf.save.do: layers = "L{}_{}".format(*cf.model.units) cf.save.directory = xt.join(cf.save.directory, layers) cf.save.directory = xt.make_dirs_current_time(cf.save.directory) writer = tf.summary.create_file_writer(cf.save.directory) writer.set_as_default() cf.dump(xt.join(cf.save.directory, "config.yaml")) checkpoint = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager( checkpoint, xt.join(cf.save.directory, "model"), max_to_keep=cf.save.model.num ) logger = xs.ReplayBuffer({ "epoch": 1, "loss": 1, "accuracy": 1 }, capacity=cf.train.epoch.max) epoch = tf.Variable(0, dtype=tf.int64) step = tf.Variable(0, dtype=tf.int64) while True: epoch = epoch + 1 for batch in train_loader: step = step + 1 if step == 1: tf.summary.trace_on() loss = train(batch, model, optimizer) tf.summary.scalar("train/loss", tf.constant(loss), step=step) if step == 1: tf.summary.trace_export( "FaultDetector", step=epoch, profiler_outdir=None ) if cf.save.do and (epoch % cf.save.model.interval) == 0: manager.save() writer.flush() loss, acc = test(test_loader, model) tf.summary.scalar("test/loss", tf.Variable(loss), step=epoch) tf.summary.scalar("test/accuracy", tf.Variable(acc), step=epoch) logger.add(epoch=epoch.numpy(), loss=loss, accuracy=acc) print("epoch: {: 4.0f}\t loss: {:10.6f}\t accuracy: {:10.6f}".format( epoch.numpy(), loss, acc )) if epoch == cf.train.epoch.max: break # post train processing ret = xs.Retriever(logger.buffer()) result = pd.DataFrame({ "epoch": ret("epoch"), "loss": ret("loss"), "accuracy": ret("accuracy"), }) result_name = xt.join(cf.save.directory, "result.csv") result.to_csv(result_name, index=False) print(result) xt.info("result saved", result_name)
def train(conf): cf = xt.Config(conf) env = gym.make(cf.env.name) test_env = gym.make(cf.env.name) dim_obs = space.get_size(env.observation_space) dim_act = space.get_size(env.action_space) agent = DQN(cf.dqn.units, dim_act, dim_obs, gamma=cf.dqn.gamma, epsilon=cf.dqn.epsilon) # saver if cf.save.do: cf.save.path = xt.make_dirs_current_time(cf.save.path) writer = tf.summary.create_file_writer(cf.save.path) writer.set_as_default() checkpoint = tf.train.Checkpoint(model=agent.model()) manager = tf.train.CheckpointManager(checkpoint, xt.join(cf.save.path, "model"), max_to_keep=cf.save.model.num) rb = buffer.ReplayBuffer() step = tf.Variable(0, dtype=tf.int64) while True: obs = env.reset() episode_step = 0 episode_reward = 0 while True: act = agent.select_action(obs) next_obs, reward, done, _ = env.step(act) episode_reward += reward episode_step += 1 rb.store(state=obs, action=act, next_state=next_obs, reward=reward, done=done).flush() # train if step >= cf.train.step.warmup: batch = rb.sample(cf.train.batch) loss = agent.train(batch) tf.summary.scalar("train/loss", tf.constant(loss), step=step) # test if (step.numpy() % cf.test.interval) == 0: test_step, test_reward = test(cf, agent, test_env) tf.summary.scalar("test/episode_step", tf.constant(test_step), step=step) tf.summary.scalar("test/reward", tf.constant(test_reward), step=step) print("test: step", test_step, "reward", test_reward) step = step + 1 obs = next_obs if step > cf.train.step.max: return if done: env.close() break tf.summary.scalar("train/episode_step", tf.constant(episode_step), step=step) tf.summary.scalar("train/episode_reward", tf.constant(episode_reward), step=step) print("step:", step, "episode[ step:", episode_step, "reward:", episode_reward, "]")
target_q = self.discount * (1 - done) * self._critic_target( [next_state, self._policy_target(next_state)]) target_q = reward + target_q current_q = self._critic([state, action]) return tf.stop_gradient(target_q) - current_q if __name__ == '__main__': import xtools as xt xt.go_to_root() critic = Critic([8, 8], 3, 2) critic.summary() opt = tf.optimizers.Adam(learning_rate=1e-3) save_dir = xt.make_dirs_current_time("tests/algorithms/ddpg/result/test") writer = tf.summary.create_file_writer(save_dir) writer.set_as_default() for step in range(10000): act = np.random.rand(1, 2).astype(np.float32) obs = np.random.rand(1, 3).astype(np.float32) rwd = np.concatenate([obs, act], axis=1) rwd = np.sum(np.square(obs)) + np.sum(np.square(act)) with tf.GradientTape() as tape: val = critic([obs, act]) error = val - rwd loss = tf.square(error) / 2 loss = tf.reduce_mean(loss) grad = tape.gradient(loss, critic.trainable_variables)