Exemplo n.º 1
0
def run(conf):
    cf = xt.Config(conf)
    xt.info("config", cf)

    # -----------------------------------------------------
    # prepare result directory and TensorBoard
    if cf.save.do:
        save_dir = xt.make_dirs_current_time(
            xt.join(cf.save.directory, cf.env.name))
        writer = tf.summary.create_file_writer(save_dir)
        writer.set_as_default()

    # -----------------------------------------------------
    # preparation
    env = gym.make(cf.env.name)
    test_env = gym.make(cf.env.name)
    xt.info("env", env)

    agent = generate_ddpg(cf, env)
    xt.info("ddpg", agent)

    rb = buffer.ReplayBuffer()
    xt.info("ReplayBuffer", rb)

    # -----------------------------------------------------
    # main loop
    main(cf, agent, rb, env, test_env)

    # -----------------------------------------------------
    # dispose
    env.close()
    test_env.close()
Exemplo n.º 2
0
def prepare_saver(cf, agent):
    cf.save.path = xt.make_dirs_current_time(cf.save.path)
    writer = tf.summary.create_file_writer(cf.save.path)
    writer.set_as_default()
    checkpoint = tf.train.Checkpoint(model=agent.model())
    manager = tf.train.CheckpointManager(checkpoint,
                                         xt.join(cf.save.path, "model"),
                                         max_to_keep=cf.save.model.num)
Exemplo n.º 3
0
def run(config):
    cf = config if isinstance(config, xt.Config) else xt.Config(config)
    xt.info("config loaded", cf)

    # command
    if cf.command.d2r:
        cf.command.max = xt.d2r(cf.command.max)
    cmd = Command(cf.sampling.dt, cf.command.max, cf.command.interval)
    cmd.reset()
    xt.info("command generator", cmd)

    # aircraft model
    env = LVAircraft(cf.dt, sampling_interval=cf.sampling.dt)
    xt.info("env", env)

    # logger
    capacity = int(cf.due / cf.sampling.dt + 1)
    buf = np.zeros((capacity, cf.sampling.size))
    xt.info("buffer size", buf.shape)
    # save dir
    cf.save.path = xt.join(cf.save.path, cf.fail.mode)
    cf.save.path = xt.make_dirs_current_time(cf.save.path, exist_ok=True)
    # result file list
    result_files = []

    # set fail
    cf.fail.mode = MODE[cf.fail.mode]
    cf.fail.value = cf.fail.value if not cf.fail.d2r else xt.d2r(cf.fail.value)
    env.elevator.set_fail_mode(cf.fail.mode, cf.fail.value)

    for round in range(cf.save.num):
        print("-" * 60)
        xt.info("round", round)
        roll_over(cf, cmd, env, buf)

        labels = ["time", "dec", "de", "u", "w", "q", "theta", "mode"]
        result = pd.DataFrame(
            {key: buf[:, i].flatten()
             for i, key in enumerate(labels)})

        ax = result.plot(x="time", y=["dec", "de"])
        ax.legend(["command", "elevator"])
        # plt.show()
        plt.savefig(xt.join(cf.save.path, "{:03}.png".format(round)))
        plt.close()
        ax.clear()

        fname = "{:03}.csv".format(round)
        fname = xt.join(cf.save.path, fname)
        result_files.append(fname)
        result.to_csv(fname, index=False)
        del result

        buf = reset(env, cmd, buf)

    result_files = np.array(result_files)
    np.savetxt(xt.join(cf.save.path, "all.txt"), result_files, fmt="%s")
    print(result_files)
Exemplo n.º 4
0
def main(conf):
    cf = conf if isinstance(conf, xt.Config) else xt.Config(conf)

    # model
    model = FaultDetector(cf.model.units, cf.model.size.input, cf.model.size.output)
    model.summary()
    optimizer = tf.keras.optimizers.Adam(learning_rate=cf.train.lr)

    # data loader
    train_loader = build_loader(cf.data.train, cf.train.batch.size)
    test_loader  = build_loader(cf.data.test, cf.train.batch.size)

    # logger
    if cf.save.do:
        layers = "L{}_{}".format(*cf.model.units)
        cf.save.directory = xt.join(cf.save.directory, layers)
        cf.save.directory = xt.make_dirs_current_time(cf.save.directory)
        writer = tf.summary.create_file_writer(cf.save.directory)
        writer.set_as_default()
        cf.dump(xt.join(cf.save.directory, "config.yaml"))
        checkpoint = tf.train.Checkpoint(model=model)
        manager = tf.train.CheckpointManager(
            checkpoint,
            xt.join(cf.save.directory, "model"),
            max_to_keep=cf.save.model.num
        )
    logger = xs.ReplayBuffer({
        "epoch": 1,
        "loss": 1,
        "accuracy": 1
    }, capacity=cf.train.epoch.max)

    epoch = tf.Variable(0, dtype=tf.int64)
    step = tf.Variable(0, dtype=tf.int64)

    while True:
        epoch = epoch + 1

        for batch in train_loader:
            step = step + 1
            if step == 1:
                tf.summary.trace_on()

            loss = train(batch, model, optimizer)
            tf.summary.scalar("train/loss", tf.constant(loss), step=step)

            if step == 1:
                tf.summary.trace_export(
                    "FaultDetector",
                    step=epoch,
                    profiler_outdir=None
                )

        if cf.save.do and (epoch % cf.save.model.interval) == 0:
            manager.save()
            writer.flush()

        loss, acc = test(test_loader, model)
        tf.summary.scalar("test/loss",     tf.Variable(loss), step=epoch)
        tf.summary.scalar("test/accuracy", tf.Variable(acc),  step=epoch)
        logger.add(epoch=epoch.numpy(), loss=loss, accuracy=acc)
        print("epoch: {: 4.0f}\t loss: {:10.6f}\t accuracy: {:10.6f}".format(
            epoch.numpy(), loss, acc
        ))

        if epoch == cf.train.epoch.max:
            break

    # post train processing
    ret = xs.Retriever(logger.buffer())
    result = pd.DataFrame({
        "epoch": ret("epoch"),
        "loss": ret("loss"),
        "accuracy": ret("accuracy"),
    })

    result_name = xt.join(cf.save.directory, "result.csv")
    result.to_csv(result_name, index=False)
    print(result)
    xt.info("result saved", result_name)
Exemplo n.º 5
0
def train(conf):
    cf = xt.Config(conf)

    env = gym.make(cf.env.name)
    test_env = gym.make(cf.env.name)
    dim_obs = space.get_size(env.observation_space)
    dim_act = space.get_size(env.action_space)

    agent = DQN(cf.dqn.units,
                dim_act,
                dim_obs,
                gamma=cf.dqn.gamma,
                epsilon=cf.dqn.epsilon)

    # saver
    if cf.save.do:
        cf.save.path = xt.make_dirs_current_time(cf.save.path)
        writer = tf.summary.create_file_writer(cf.save.path)
        writer.set_as_default()
        checkpoint = tf.train.Checkpoint(model=agent.model())
        manager = tf.train.CheckpointManager(checkpoint,
                                             xt.join(cf.save.path, "model"),
                                             max_to_keep=cf.save.model.num)

    rb = buffer.ReplayBuffer()
    step = tf.Variable(0, dtype=tf.int64)

    while True:
        obs = env.reset()
        episode_step = 0
        episode_reward = 0

        while True:
            act = agent.select_action(obs)
            next_obs, reward, done, _ = env.step(act)
            episode_reward += reward
            episode_step += 1

            rb.store(state=obs,
                     action=act,
                     next_state=next_obs,
                     reward=reward,
                     done=done).flush()

            # train
            if step >= cf.train.step.warmup:
                batch = rb.sample(cf.train.batch)
                loss = agent.train(batch)
                tf.summary.scalar("train/loss", tf.constant(loss), step=step)

            # test
            if (step.numpy() % cf.test.interval) == 0:
                test_step, test_reward = test(cf, agent, test_env)
                tf.summary.scalar("test/episode_step",
                                  tf.constant(test_step),
                                  step=step)
                tf.summary.scalar("test/reward",
                                  tf.constant(test_reward),
                                  step=step)
                print("test: step", test_step, "reward", test_reward)

            step = step + 1
            obs = next_obs

            if step > cf.train.step.max:
                return

            if done:
                env.close()
                break

        tf.summary.scalar("train/episode_step",
                          tf.constant(episode_step),
                          step=step)
        tf.summary.scalar("train/episode_reward",
                          tf.constant(episode_reward),
                          step=step)
        print("step:", step, "episode[ step:", episode_step, "reward:",
              episode_reward, "]")
Exemplo n.º 6
0
        target_q = self.discount * (1 - done) * self._critic_target(
            [next_state, self._policy_target(next_state)])
        target_q = reward + target_q
        current_q = self._critic([state, action])
        return tf.stop_gradient(target_q) - current_q


if __name__ == '__main__':
    import xtools as xt
    xt.go_to_root()
    critic = Critic([8, 8], 3, 2)
    critic.summary()

    opt = tf.optimizers.Adam(learning_rate=1e-3)

    save_dir = xt.make_dirs_current_time("tests/algorithms/ddpg/result/test")
    writer = tf.summary.create_file_writer(save_dir)
    writer.set_as_default()

    for step in range(10000):
        act = np.random.rand(1, 2).astype(np.float32)
        obs = np.random.rand(1, 3).astype(np.float32)
        rwd = np.concatenate([obs, act], axis=1)
        rwd = np.sum(np.square(obs)) + np.sum(np.square(act))

        with tf.GradientTape() as tape:
            val = critic([obs, act])
            error = val - rwd
            loss = tf.square(error) / 2
            loss = tf.reduce_mean(loss)
        grad = tape.gradient(loss, critic.trainable_variables)