Exemplo n.º 1
0
def start_api():
    logger_flask = logging.getLogger('werkzeug')
    for hdlr in logger_flask.handlers[:]:  # remove all old handlers
        logger_flask.removeHandler(hdlr)


    config.setup_logger(logger_flask, config.get_value(AppConfig.API_CONFIGURATION, AppConfig.ATTR_LOG_LEVEL),
                        AppConfig.API_CONFIGURATION)
    config.setup_logger(rest_app.logger, config.get_value(AppConfig.API_CONFIGURATION, AppConfig.ATTR_LOG_LEVEL),
                        AppConfig.API_CONFIGURATION)
    host = config.get_value(AppConfig.API_CONFIGURATION, AppConfig.API_HOST)
    port = config.get_value(AppConfig.API_CONFIGURATION, AppConfig.API_PORT)
    debug = config.is_value_active(AppConfig.API_CONFIGURATION, AppConfig.API_DEBUG)
    ssl = 'adhoc' if config.is_value_active(AppConfig.API_CONFIGURATION, AppConfig.API_SSL) else None
    logger.info('REST API starting')

    socketio.run(rest_app)
Exemplo n.º 2
0
def main():
    args = extend_arguments(get_parser()).parse_args()
    configs = common.config.get_config(args.env, args.experiment_name)
    assert args.alg in ['a2c', 'ppo', 'acktr', 'sac']
    if args.recurrent_policy:
        assert args.alg in ['a2c', 'ppo'
                            ], 'Recurrent policy is not implemented for ACKTR'

    if args.test:
        args.num_processes = 1
        args.use_wandb = False

    logger = setup_logger(args.verbose, args.model_name, configs.log_directory)
    torch.set_num_threads(1)

    # set seed values
    seed = args.seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if args.use_wandb:
        import wandb
        resume_wandb = True if args.wandb_resume_id is not None else False
        wandb.init(config=args,
                   resume=resume_wandb,
                   id=args.wandb_resume_id,
                   project='rl',
                   name=args.experiment_name)

    # make environements (envs[0] is used for evaluation)
    envs, env_vector = make_vec_envs_pytorch(args.env,
                                             return_evn_vector=True,
                                             device=device,
                                             log_dir=configs.log_directory,
                                             **vars(args))
    eval_envs = wrap_env_pytorch(env_vector[0], args.gamma, device)

    actor_critic = Policy(envs.observation_space.shape,
                          envs.action_space,
                          base_kwargs={
                              'recurrent': args.recurrent_policy,
                              'hidden_size': args.hidden_layer_size
                          })
    # load model
    if args.load_path is not None:
        logger.info("loading model: {}".format(args.load_path))
        actor_critic = torch.load(args.load_path)

    actor_critic.to(device)

    if args.test:
        test(eval_envs, actor_critic, args, logger)
    else:
        train(envs, env_vector, eval_envs, actor_critic, args, configs, logger)
Exemplo n.º 3
0
def main():
    args = extend_arguments(get_parser()).parse_args()
    configs = common.config.get_config(args.env, args.experiment_name)

    if args.test:
        args.num_processes = 1
        args.use_wandb = False

    logger = setup_logger(args.verbose, args.experiment_name, configs.log_directory)
    torch.set_num_threads(1)

    # set seed values
    seed = args.seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if args.use_wandb:
        import wandb
        resume_wandb = True if args.wandb_resume_id is not None else False
        wandb.init(config=args, resume=resume_wandb, id=args.wandb_resume_id, project='rl',
                   name=args.experiment_name)

    env = gym.make(args.env, **vars(args))

    # Agent
    global_episodes = 0
    agent = SAC(env.observation_space.shape[0], env.action_space, args)
    if args.load_path:
        global_episodes = agent.load_model(args.load_path, args.load_optim) * int(
            not args.reset_global_episode)
        logger.info(f'Agent loaded: {args.load_path} @{global_episodes}')

    memory = None
    if args.memory_load_path:
        memory = pickle.load(open(args.memory_load_path, 'rb'))
        logger.info(f'Memory loaded: {args.memory_load_path}')
        logger.info(f'Loaded Memory Length: {len(memory)}')
        logger.warning('There is something wrong with loading experiments from memory and '
                       'the training becomes unstable. Be extra careful when using this feature!')

    if args.test:
        test(env, agent, args)
    else:
        train(env, agent, args, configs, memory, global_episodes)
Exemplo n.º 4
0
def test(env, agent, args):
    logger = setup_logger()
    env.seed(args.seed)
    avg_reward, infos = _test(env, agent, args.test_episode)
    logger.info('Test trial complete. Writing results...')

    results_path = args.load_path + '_test_results'

    if args.env[0:6] == 'JawEnv':
        from artisynth_envs.envs.jaw_env import write_infos, calculate_convex_hull, \
            maximum_occlusal_force
        write_infos(infos, results_path)

        # Derived metrics
        maximum_occlusal_force(env, results_path)
        calculate_convex_hull(results_path)

    logger.info(f'results written to: {results_path}')
    env.close()
Exemplo n.º 5
0
import numpy as np
import torch
import os

from common import constants as c
from common.utilities import Bunch
from common.config import setup_logger
from artisynth_envs.artisynth_base_env import ArtiSynthBase

logger = setup_logger()


class JawEnv(ArtiSynthBase):
    def __init__(self, wait_action, reset_step, goal_threshold, goal_reward,
                 **kwargs):
        self.args = Bunch(kwargs)
        super().__init__(**kwargs)

        self.episode_counter = 0
        self.action_size = 0
        self.obs_size = 0
        self.goal_threshold = float(goal_threshold)

        self.reset_step = int(reset_step)
        self.wait_action = float(wait_action)

        self.goal_reward = goal_reward

        self.action_size, self.obs_size = self.init_spaces(
            incremental_actions=self.incremental_actions)
Exemplo n.º 6
0
def main():
    args = extend_arguments(get_parser()).parse_args()
    configs = common.config.get_config(args.env, args.experiment_name)
    setup_tensorflow()
    get_custom_objects().update(
        {'SmoothLogistic': Activation(smooth_logistic)})

    save_path = os.path.join(configs.trained_directory,
                             args.alg + "-" + args.env + ".h5f")

    log_file_name = args.model_name
    logger = setup_logger(args.verbose, log_file_name, configs.log_directory)

    import artisynth_envs.envs  # imported here to avoid the conflict with tensorflow's logger
    env = gym.make(args.env, **vars(args))
    env.seed(args.seed)

    try:
        nb_actions = env.action_space.shape[0]
        memory = SequentialMemory(limit=MEMORY_SIZE, window_length=1)

        mu_model = get_mu_model(env)
        v_model = get_v_model(env)
        l_model = get_l_model(env)

        random_process = OrnsteinUhlenbeckProcess(
            size=nb_actions,
            theta=THETA,
            mu=MU,
            sigma=SIGMA,
            dt=DT,
            sigma_min=SIGMA_MIN,
            n_steps_annealing=NUM_STEPS_ANNEALING)

        agent = MuscleNAFAgent(nb_actions=nb_actions,
                               V_model=v_model,
                               L_model=l_model,
                               mu_model=mu_model,
                               memory=memory,
                               nb_steps_warmup=WARMUP_STEPS,
                               random_process=random_process,
                               gamma=GAMMA,
                               target_model_update=UPDATE_TARGET_MODEL_STEPS)

        agent.compile(Adam(lr=args.lr), metrics=['mse'])
        env.agent = agent

        if args.load_path is not None:
            agent.load_weights(args.load_path)
            logger.info(f'Wights loaded from: {args.load_path}')

        callbacks = []
        if args.use_tensorboard:
            from rl.callbacks import RlTensorBoard
            tensorboard = RlTensorBoard(log_dir=os.path.join(
                configs.tensorboard_log_directory, log_file_name),
                                        histogram_freq=1,
                                        batch_size=BATCH_SIZE,
                                        write_graph=True,
                                        write_grads=True,
                                        write_images=False,
                                        embeddings_freq=0,
                                        embeddings_layer_names=None,
                                        embeddings_metadata=None,
                                        agent=agent)
            callbacks.append(tensorboard)
        if args.use_csvlogger:
            csv_logger = keras.callbacks.CSVLogger(os.path.join(
                configs.agent_log_directory, log_file_name),
                                                   append=False,
                                                   separator=',')
            callbacks.append(csv_logger)

        if not args.test:  # train code
            training = True
            agent.fit(env,
                      nb_steps=NUM_TRAINING_STEPS,
                      visualize=False,
                      verbose=args.verbose,
                      nb_max_episode_steps=args.reset_step,
                      callbacks=callbacks)
            logger.info('Training complete')
            agent.save_weights(save_path)
        else:  # test code
            logger.info("Testing")
            training = False
            env.log_to_file = False
            history = agent.test(env,
                                 nb_episodes=args.test_episode,
                                 nb_max_episode_steps=args.reset_step)
            logger.info(history.history)
            logger.info('Average last distance: ',
                        np.mean(history.history['last_distance']))
            logger.info('Mean Reward: ',
                        np.mean(history.history['episode_reward']))

    except Exception as e:
        if training:
            agent.save_weights(save_path)
        logger.info("Error in main code:", str(e))
        raise e
Exemplo n.º 7
0
def train(env, agent, args, configs, memory=None, global_episodes=0):
    logger = setup_logger()

    # TesnorboardX
    if args.use_tensorboard:
        writer = SummaryWriter(logdir='{}/{}_SAC_{}_{}_{}'.format(
            configs.tensorboard_log_directory,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), args.env,
            args.policy, "autotune" if args.automatic_entropy_tuning else ""))

    # Memory
    memory = memory or ReplayMemory(args.replay_size)

    # Training Loop
    global_steps = 0

    for global_episodes in itertools.count(start=global_episodes, step=1):
        episode_reward = 0
        episode_steps = 0
        done = False

        state = env.reset()

        critic_1_loss_total = 0
        critic_2_loss_total = 0
        policy_loss_total = 0
        ent_loss_total = 0
        alpha_total = 0
        while not done:
            action = agent.select_action(state)
            if len(memory
                   ) > args.batch_size and global_steps > args.start_steps:
                # print('updating', len(memory), global_steps)
                for i in range(args.updates_per_step
                               ):  # Number of updates per step in environment
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = \
                        agent.update_parameters(memory, args.batch_size)  # update all parameters

                    critic_1_loss_total += critic_1_loss
                    critic_2_loss_total += critic_2_loss
                    policy_loss_total += policy_loss
                    ent_loss_total += ent_loss
                    alpha_total += alpha

            next_state, reward, done, _ = env.step(action)  # Step
            episode_steps += 1
            global_steps += 1
            episode_reward += reward

            # Ignore the "done" signal if it comes from hitting the time horizon.
            # (https://github.com/openai/spinningup/blob/master/spinup/algos/sac/sac.py)
            mask = 1 if episode_steps == env.reset_step else float(not done)

            memory.push(state, action, reward, next_state,
                        mask)  # Append transition to memory
            state = next_state
        # end of episode

        # The following values are a bit off for the first episode as we have no updates
        # for len(memory) < batch_size
        critic_1_loss_total /= (episode_steps * args.updates_per_step)
        critic_2_loss_total /= (episode_steps * args.updates_per_step)
        policy_loss_total /= (episode_steps * args.updates_per_step)
        ent_loss_total /= (episode_steps * args.updates_per_step)
        alpha_total /= (episode_steps * args.updates_per_step)
        episode_reward /= episode_steps

        if global_episodes % args.episode_log_interval == 0:
            print(
                "Episode: {}, total numsteps: {}, episode steps: {}, reward: {}"
                .format(global_episodes, global_steps, episode_steps,
                        round(episode_reward, 2)))
            if args.use_tensorboard:
                writer.add_scalar('reward/train', episode_reward,
                                  global_episodes)
                writer.add_scalar('loss/critic_1', critic_1_loss_total,
                                  global_episodes)
                writer.add_scalar('loss/critic_2', critic_2_loss_total,
                                  global_episodes)
                writer.add_scalar('loss/policy', policy_loss_total,
                                  global_episodes)
                writer.add_scalar('loss/entropy_loss', ent_loss_total,
                                  global_episodes)
                writer.add_scalar('entropy_temprature/alpha', alpha_total,
                                  global_episodes)
            if args.use_wandb:
                import wandb
                wandb.log(
                    {
                        'episode_reward': episode_reward,
                        'loss/critic_1': critic_1_loss_total,
                        'loss/critic_2': critic_2_loss_total,
                        'loss/policy': policy_loss_total,
                        'loss/entropy_loss': ent_loss_total,
                        'entropy_temprature/alpha': alpha_total,
                        'lr': get_lr_pytorch(agent.policy_optim)
                    },
                    step=global_episodes)

        if global_episodes % args.eval_interval == args.eval_interval - 1:
            avg_reward, infos = _test(env, agent, args.eval_episode)
            if args.use_tensorboard:
                writer.add_scalar('eval/avg_reward', avg_reward,
                                  global_episodes)
                for key, val in infos.items():
                    writer.add_scalar(f'eval/{key}', val, global_episodes)
            if args.use_wandb:
                import wandb
                wandb.log({'eval/avg_reward': avg_reward},
                          step=global_episodes)
                for key, val in infos.items():
                    wandb.log({f'eval/{key}': val}, step=global_episodes)

        if global_episodes % args.save_interval == args.save_interval - 1:
            test_save_path = os.path.join(configs.trained_directory,
                                          'test_file')

            # TODO: update the following hack by saving file temp and copy to destination
            with open(test_save_path, 'w') as test_file:
                test_file.write(
                    "This is just to make sure we have enough disk space to fully save "
                    "the file not to screw up the agent or the memory! " *
                    1000)

            agent_save_path = os.path.join(configs.trained_directory, 'agent')
            agent.global_episode = global_episodes + 1
            # torch.save(agent, agent_save_path)
            agent.save_model(agent_save_path, global_episodes)
            logger.info(f'model saved: {agent_save_path}')

            memory_path = os.path.join(configs.trained_directory, 'memory')
            pickle.dump(memory, open(memory_path, 'wb'))
            logger.info(f'memory saved: {memory_path}')
            print('------------------')

        if global_steps > args.num_steps:  # end of training
            break

    env.close()