import pickle from keras.models import load_model seed = 1234 np.random.seed(seed) def demand(): while True: d = int(np.random.normal(10, 5)) if d < 0: d = 0 yield d demand_gen = demand() # env_exp3 = BeerGameEnv(demand_gen, lag=2) api = env_exp3.start_play() state, r, d = next(api) shape = np.array(state).flatten().shape # # agents_exp3 = [Agent(policy="ar")] # agents_exp3.append(Agent()) # agents_exp3.append(DQN(state_shape=shape, n_action=25, net=simple_net)) # agents_exp3.append(Agent()) # # bg_exp3 = chain_wrapper(agents_exp3, env_exp3) # bg_exp3.play_mixed(episode=1000) # # plt.plot(np.array(bg_exp3.agents[0].cum_r) + np.array(bg_exp3.agents[1].cum_r) # + np.array(bg_exp3.agents[2].cum_r) + np.array(bg_exp3.agents[3].cum_r)) # plt.show() #
import matplotlib.pyplot as plt import pickle eps = 10 batch_size = 32 def demand(): while True: yield np.random.uniform(10) demand_gen = demand() env = BeerGameEnv(demand_gen, lag=2) api = env.start_play() agents = [] state, r, d = next(api) r d shape = np.array(state).flatten().shape shape state for i in range(4): agents.append(DQN(state_shape=shape, n_action=10, net=simple_net)) bg = chain_wrapper(agents, env) bg.play_4dqn(episode=10000)