def _init():
     env = environment(inputfile, gamma)
     env.seed(seed + rank)
     return env
Пример #2
0
        model.add(
            Dense(64,
                  input_dim=state_size,
                  activation='relu',
                  kernel_initializer='he_uniform'))
        model.add(Dropout(dropout))
        model.add(Dense(self.action_size, activation='softmax'))
        #model = Model(inputs=state_input, outputs=output)
        #model = Model(input=[state_input, advantage], output=[output])
        model.compile(optimizer=Adam(lr=LR_actor),
                      loss='categorical_crossentropy')

        return model

if __name__ == "__main__":
    env = environment(inputfile, gamma)
    state_size = env.flatlen  #[0]

    action_size = env.Ilen * env.Jlen  #.n
    agent = DQNAgent(state_size, action_size)
    agent.batch_size = env.turns  #for one pass on policy algorithms

    #agent.load("model.h5")
    done = False

    episodelist = list()
    scorelist = list()
    output = list()
    e = 0
    #
    #while time.time()<end:
#LR_critic=inputarray.loc[idx].LR_critic
LR = inputarray.loc[idx].LR
batch_size = int(inputarray.loc[idx].batch_size)
#memcap=int(inputarray.loc[idx].memcap)
inputfile = inputarray.loc[idx].inputfile
gamma = inputarray.loc[idx].gamma
#dropout=float(inputarray.loc[idx].dropout)

start = time.time()
end = start + 11.5 * 60 * 60
#inputfile="BM_easy6x6x4.xlsx"
#LR=0.000001
#gamma=0.995
#batch_size=32
#n_steps=5
inspectenv = environment(inputfile, gamma)

episodetimesteps = int(inspectenv.turns)

LR_s = str(LR).split('.')[1]
inputfile_s = inputfile.split('.')[0]
gamma_s = str(gamma).split('.')[1]


class TimeLimit(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
Пример #4
0
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines.common import set_global_seeds, make_vec_env
from stable_baselines.common.callbacks import BaseCallback, CallbackList, EvalCallback
from stable_baselines import A2C
from OPenv_gym import environment

start = time.time()
end = start + 2 * 60 * 60
inputfile = "BM_central15x15x5.xlsx"
LR = 0.001
LR2 = 0.000001
gamma = 0.95
batch_size = 64
#n_steps=5
inspectenv = environment(inputfile, gamma)
test = 'A2C'

episodetimesteps = int(inspectenv.turns)

eval_env = environment(inputfile, gamma, rendermode="on")

loaded_model = A2C.load("best_model2")
ob = eval_env.reset()
cum_reward = 0

for a in range(round(eval_env.flatlen * 0.5)):

    action = loaded_model.predict(ob)

    ob, reward, terminal, _ = eval_env.step(action)