Пример #1
0
    def __init__(self, env_id, seed):
        self.env_id = env_id
        self.gamma = 0.99  # fixme
        self.epsilon = 0.1
        self.epsilon_decay = 0.5  # 0.92
        self.seed = seed
        self.history = []

        # Create a new base directory like /tmp/openai-2018-05-21-12-27-22-552435
        log_dir = os.path.join(energyplus_logbase_dir(), datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
        if not os.path.exists(log_dir + '/output'):
            os.makedirs(log_dir + '/output')
        os.environ["ENERGYPLUS_LOG"] = log_dir
        model = os.getenv('ENERGYPLUS_MODEL')
        if model is None:
            print('Environment variable ENERGYPLUS_MODEL is not defined')
            sys.exit()
        weather = os.getenv('ENERGYPLUS_WEATHER')
        if weather is None:
            print('Environment variable ENERGYPLUS_WEATHER is not defined')
            sys.exit()

        print('train: init logger with dir={}'.format(log_dir))  # XXX
        logger.configure(log_dir)

        self.log_dir = log_dir
Пример #2
0
def train(env_id, num_timesteps, seed):
    import baselines.common.tf_util as U
    sess = U.single_threaded_session()
    sess.__enter__()
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()

    # Create a new base directory like /tmp/openai-2018-05-21-12-27-22-552435
    log_dir = os.path.join(
        energyplus_logbase_dir(),
        datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
    if not os.path.exists(log_dir + '/output'):
        os.makedirs(log_dir + '/output')
    os.environ["ENERGYPLUS_LOG"] = log_dir
    model = os.getenv('ENERGYPLUS_MODEL')
    if model is None:
        print('Environment variable ENERGYPLUS_MODEL is not defined')
        os.exit()
    weather = os.getenv('ENERGYPLUS_WEATHER')
    if weather is None:
        print('Environment variable ENERGYPLUS_WEATHER is not defined')
        os.exit()

    rank = MPI.COMM_WORLD.Get_rank()
    if rank == 0:
        print('train: init logger with dir={}'.format(log_dir))  #XXX
        logger.configure(log_dir)
    else:
        logger.configure(format_strs=[])
        logger.set_level(logger.DISABLED)

    env = make_energyplus_env(env_id, workerseed)

    ac = env.action_space.sample()
    ob = env.reset()
    ac = np.array([-0.8, -0.8, 1.0, 1.0])
    for iter in range(num_timesteps):

        if ob[1] > 23.6:
            ac[0] -= 0.01
            ac[2] += 0.05
        if ob[1] < 23.4:
            ac[0] += 0.01
            ac[2] -= 0.05

        if ob[2] > 23.6:
            ac[1] -= 0.01
            ac[3] += 0.05
        if ob[2] < 23.4:
            ac[1] += 0.01
            ac[3] -= 0.05

        ob, rew, done, _ = env.step(ac)

        #print(ob)

        if done:
            ob = env.reset()

    env.close()
Пример #3
0
def train(env_id,
          num_timesteps,
          seed,
          learn=trpo_mpi.learn,
          policy_fn_class=MlpPolicy):
    import baselines.common.tf_util as U

    def policy_fn(name, ob_space, ac_space):
        return policy_fn_class(name=name,
                               ob_space=ob_space,
                               ac_space=ac_space,
                               hid_size=32,
                               num_hid_layers=2)

    sess = U.single_threaded_session()
    sess.__enter__()
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()

    # Create a new base directory like /tmp/openai-2018-05-21-12-27-22-552435
    log_dir = os.path.join(
        energyplus_logbase_dir(),
        datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
    if not os.path.exists(log_dir + '/output'):
        os.makedirs(log_dir + '/output')
    os.environ["ENERGYPLUS_LOG"] = log_dir
    model = os.getenv('ENERGYPLUS_MODEL')
    if model is None:
        print('Environment variable ENERGYPLUS_MODEL is not defined')
        return
    weather = os.getenv('ENERGYPLUS_WEATHER')
    if weather is None:
        print('Environment variable ENERGYPLUS_WEATHER is not defined')
        return

    rank = MPI.COMM_WORLD.Get_rank()
    if rank == 0:
        print('train: init logger with dir={}'.format(log_dir))  # XXX
        logger.configure(log_dir)
    else:
        logger.configure(format_strs=[])
        logger.set_level(logger.DISABLED)

    env = make_energyplus_env(env_id, workerseed)

    learn(
        env,
        policy_fn,
        max_timesteps=num_timesteps,
        # timesteps_per_batch=1*1024, max_kl=0.01, cg_iters=10, cg_damping=0.1,
        timesteps_per_batch=16 * 1024,
        max_kl=0.01,
        cg_iters=10,
        cg_damping=0.1,
        gamma=0.99,
        lam=0.98,
        vf_iters=5,
        vf_stepsize=1e-3)
    env.close()
def train(env_id, num_timesteps, seed):
    # sess = util.single_threaded_session()
    # sess.__enter__()
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()

    # Create a new base directory like //home/marco/Reinforcement_Learning/Logs/openai-2018-05-21-12-27

    log_dir = os.path.join(
        energyplus_logbase_dir(),
        datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M"))
    if not os.path.exists(log_dir + '/output'):
        os.makedirs(log_dir + '/output')
    os.environ["ENERGYPLUS_LOG"] = log_dir
    model = os.getenv('ENERGYPLUS_MODEL')
    if model is None:
        print('Environment variable ENERGYPLUS_MODEL is not defined')
        exit()
    weather = os.getenv('ENERGYPLUS_WEATHER')
    if weather is None:
        print('Environment variable ENERGYPLUS_WEATHER is not defined')
        exit()

    # MPI is to parallelize training
    # Logs the training in a file log.txt in the given directory

    rank = MPI.COMM_WORLD.Get_rank()
    if rank == 0:
        print('train: init logger with dir={}'.format(log_dir))  # XXX
        logger.configure(log_dir)
    else:
        logger.configure(format_strings=[])
        logger.set_level(logger.DISABLED)

    # Make Gym environment:

    env = make_energyplus_env(env_id, workerseed)

    ###### EXPERIMENTS FROM FIRST PAPER: ###########################################
    #
    # trpo_mpi.learn(env,  policy_fn,
    #                max_timesteps=num_timesteps,
    #                timesteps_per_batch=16*1024, max_kl=0.01, cg_iters=10, cg_damping=0.1,
    #                gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3)

    # Apply TRPO algorithm from OpenAI baselines:

    # action_noise = NormalActionNoise(mean=np.zeros(4), sigma=0.1 * np.ones(4))
    #
    # policy_kwargs_tqc = dict(n_critics=2, n_quantiles=25)
    # model_tqc = TQC("MlpPolicy", env, top_quantiles_to_drop_per_net=2
    #                 , verbose=1, policy_kwargs=policy_kwargs_tqc)
    #
    # model_ppo = PPO('MlpPolicy', env, verbose=1, n_steps=4096, batch_size=64, n_epochs=15)
    # model_td3 = TD3('MlpPolicy', env, verbose=1, action_noise=action_noise)
    # model_sac = SAC('MlpPolicy', env, verbose=1)
    # model_ppolstm = PPO2(MlpLstmPolicy, env, verbose=1,n_steps=27, nminibatches=1)
    #
    # # Change the algorithm here:
    #
    # model_ppolstm.learn(total_timesteps=num_timesteps, log_interval=1, reset_num_timesteps=False)
    # # model_ppo.learning_rate = 0
    # # model_ppo.learn(total_timesteps=35040, reset_num_timesteps=False)
    #
    #####################################EXPERIMENTS 2: ###################################

    sac_v2_lstm(env, num_timesteps, train=True, test=False)
    #slac(env, num_timesteps)

    env.close()