class DQN(BaseAgent): def __init__(self, model, processor, policy, test_policy, num_actions): # Replay memory memory = SequentialMemory(limit=opt.dqn_replay_memory_size, window_length=opt.dqn_window_length) self.agent = DQNAgent(model=model, nb_actions=num_actions, policy=policy, test_policy=test_policy, memory=memory, processor=processor, batch_size=opt.dqn_batch_size, nb_steps_warmup=opt.dqn_nb_steps_warmup, gamma=opt.dqn_gamma, target_model_update=opt.dqn_target_model_update, enable_double_dqn=opt.enable_double_dqn, enable_dueling_network=opt.enable_dueling_network, train_interval=opt.dqn_train_interval, delta_clip=opt.dqn_delta_clip) self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae']) def fit(self, env, num_steps, weights_path=None, visualize=False): callbacks = [] if weights_path is not None: callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)] self.agent.fit(env=env, nb_steps=num_steps, action_repetition=opt.dqn_action_repetition, callbacks=callbacks, log_interval=opt.log_interval, test_interval=opt.test_interval, test_nb_episodes=opt.test_nb_episodes, test_action_repetition=opt.dqn_action_repetition, visualize=visualize, test_visualize=visualize, verbose=1) def test(self, env, num_episodes, visualize=False): self.agent.test(env=env, nb_episodes=num_episodes, action_repetition=opt.dqn_action_repetition, verbose=2, visualize=visualize) def save(self, out_dir): self.agent.save_weights(out_dir, overwrite=True) def load(self, out_dir): self.agent.load_weights(out_dir)
log_filename = model_saves + filename_append + "_" + datestr + "_" + 'expert_' + environment_name + '_REWARD_DATA.txt' callbacks = [ TrainEpisodeLogger(log_filename), ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000000) ] if args.mode == 'train': dqn.fit(env, callbacks=callbacks, nb_steps=4250000, verbose=0, nb_max_episode_steps=1500) dqn.save_weights(weights_filename, overwrite=True) if args.mode == 'test': if args.weights_file: dqn.load_weights(args.weights_file) else: dqn.load_weights(model_saves + filename_append + "_" + datestr + "_" + 'expert_' + environment_name + '_weights.h5f') dqn.test(env, nb_episodes=100, visualize=False, verbose=2, nb_max_start_steps=30) if args.mode == 'demonstrate': dqn.load_weights(model_saves + filename_append + "_" + datestr + "_" + 'expert_' + environment_name + '_weights.h5f') demonstrate(dqn, env, 75000, model_saves + demonstrations_file)
memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=0.0015), metrics=['mae']) """ Entrenament per 150000 steps """ a = dqn.fit(env, nb_steps=150000, visualize=False, verbose=2) """ Carregar pesos, cuidado que et carregues l'entrenament """ weights_filename = 'dqn64_LunarLander-v2_weights.h5f'.format('LunarLander-v2') dqn.load_weights(weights_filename) """ Test per 20 epochs """ dqn.test(env, nb_episodes=20, visualize=False) import matplotlib.pyplot as plt plt.plot([ 199.09, 217.98, 233.922, 225.90, 220.99, 245.82, 236.89, 262.95, 221.20, 241.72 ], label='Escollit') plt.plot([ 235.44, 244.76, -94.505, 248.86, 265.25, 228.75, 202.80, 256.86, 239.59, -85.32
def training_game(): env = Environment() input_shape = (FLAGS.screen_size, FLAGS.screen_size, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=3500, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=GLOBAL_STEPS) # Agent dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=False, nb_steps_warmup=GLOBAL_STEPS_WARMUP, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) # Tensorboard callback callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = FLAGS.mini_game w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) #dqn.fit(env, callbacks=callbacks, nb_steps=GLOBAL_STEPS, action_repetition=2, log_interval=1e4, verbose=2) dqn.fit(env, nb_steps=GLOBAL_STEPS, action_repetition=2, log_interval=1000, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
delta_clip=1., nb_steps_warmup=50000) lr = .00025 dqn.compile(Adam(lr), metrics=['mae']) weights_filename = model_saves + 'expert_lander_weights.h5f' checkpoint_weights_filename = model_saves + 'expert_lander_weights{step}.h5f' log_filename = model_saves + 'expert_lander_REWARD_DATA.txt' callbacks = [ TrainEpisodeLogger(log_filename), ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000000) ] if args.mode == 'train': dqn.fit(env, callbacks=callbacks, nb_steps=4250000, verbose=0, nb_max_episode_steps=1500) dqn.save_weights(weights_filename, overwrite=True) if args.mode == 'test': dqn.load_weights(model_saves + 'expert_lander_weights.h5f') dqn.test(env, nb_episodes=5, visualize=True, verbose=2, nb_max_start_steps=30) if args.mode == 'demonstrate': dqn.load_weights(model_saves + 'expert_lander_weights.h5f') demonstrate(dqn, env, 75000, model_saves + 'demos.npy')
model = keras.layers.Flatten()(model) model = keras.layers.Dense(512, activation='relu')(model) model = keras.layers.Dense(4, activation='linear')(model) model = keras.Model(inputs=input, outputs=model) model.summary() print(model.output) model.output._keras_shape = (None, 4) print(model.output._keras_shape) game = gym.make('Breakout-v0') agent = DQNAgent(model, policy, nb_actions=game.action_space.n, nb_steps_warmup=50000, memory=memory, processor=AtariProcessor(), train_interval=4, delta_clip=1.) agent.compile(keras.optimizers.Adam(lr=.00025), metrics=['mae']) callbacks = [rl.callbacks.ModelIntervalCheckpoint('ckpt.h5f', interval=250000)] callbacks += [FileLogger('log.json', interval=100)] if False: agent.load_weights('weights.h5f') agent.fit(game, nb_steps=1750000, visualize=False, log_interval=10000, callbacks=callbacks) agent.save_weights('weights.h5f', overwrite=True) game.reset() agent.test(game, nb_episodes=10, visualize=True)
nb_actions = env.action_space.n input_shape = env.observation_space.shape window = 4 model = create_(nb_actions) memory = SequentialMemory(limit=1000000, window_length=4) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) dqn.load_weights('policy.h5') dqn.test(env, nb_episodes=10, visualize=False)
input_dim = env.input_dim model = Sequential() model.add(Flatten(input_shape=input_shape)) model.add(Dense(256, activation='relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=2000, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy, gamma=0.99) dqn.compile(Adam(lr=0.001, epsilon=0.05, decay=0.0), metrics=['mae']) # history = dqn.fit(env, nb_steps=100, action_repetition=1, visualize=False, verbose=2) # dqn.save_weights('dqn_weights_%s.h5f' % (100), overwrite=True) dqn.load_weights('dqn_weights_%s.h5f' % (3000)) # for perc_av in percent_av: perc_av = 1 print('Fleet size is {f}'.format(f=fleet_size)) print('Surge is {}'.format(surge)) print('Percentage knowing fares is {}'.format(perc_k)) print('Percentage of professional drivers {}'.format(pro_s)) m = Model(ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=fleet_size, PRO_SHARE=pro_s, SURGE_MULTIPLIER=surge, BONUS=bonus, percent_false_demand=percent_false_demand, percentage_know_fare = perc_k,
# callbacks += [ModelCheckpoint(model_filename)] callbacks += [FileLogger(log_filename, interval=250000)] callbacks += [TensorBoard(log_dir=run_id)] # class TestCallback(Callback): # def on_epoch_end(self, epoch, logs=None): # test_env = gym.make(args.env_name) # test_env.setMapSize(MAP_X,MAP_Y) # dqn.test(test_env, nb_episodes=1, visualize=True, nb_max_start_steps=100) # test_env.win1.destroy() # test_env.close() # del(test_env) # callbacks += [TestCallback()] # if args.loadmodel: # dqn.model.load(args.loadmodel) if args.weights: dqn.load_weights(args.weights) dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # dqn.save_model(model_filename) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=True) dqn.test(env, nb_episodes=10, visualize=True) # gtk.main() elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights:
target_model_update=1e-2, policy=policy, gamma=.98) dqn.compile(Adam(lr=0.00025), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [FileLogger(log_filename, interval=1)] dqn.fit(env, callbacks=callbacks, nb_steps=150000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(args.env_name), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. #dqn.test(env, nb_episodes=5, visualize=False) else: dqn.load_weights('dqn_{}_weights.h5f'.format(args.env_name)) dqn.test(env, nb_episodes=10, visualize=False)
# model.add(Dense(16)) # model.add(Activation('relu')) # model.add(Dense(16)) # model.add(Activation('relu')) # model.add(Dense(nb_actions)) # model.add(Activation('linear')) memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # history = dqn.fit(env, nb_steps=50000, visualize=True, verbose=2, nb_max_episode_steps=1000) # dqn.save_weights(os.path.join("model","dqn_multi_cls_{}_weights.h5f".format(ENV_NAME)), overwrite=True) dqn.load_weights( os.path.join( "model", "dqn_multi_cls_ralenv_multi_classify_{}-v0_weights.h5f".format( args[1]))) # env_1.seed(123) env_1.weight_num = args[1] dqn.test(env_1, nb_episodes=5, visualize=True) # env_2.seed(123) env_2.weight_num = args[1] dqn.test(env_2, nb_episodes=5, visualize=True)
trade_cost = 0.03 env.init_file(output_file, feature_list, trade_cost, False) model = create_model(env) memory = SequentialMemory(limit=5000, window_length=1) policy = GreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=env.action_size, memory=memory, nb_steps_warmup=50, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mse']) dqn.load_weights(w_file_name) dqn.test(env, nb_episodes=1, action_repetition=1, callbacks=None, visualize=True, nb_max_episode_steps=None, nb_max_start_steps=0, start_step_policy=None, verbose=1) fig = plt.figure() gs = gridspec.GridSpec(2, 1, figure=fig) ax1 = fig.add_subplot(gs[0, 0]) ax1.plot(env.df['close'], '-b', linewidth=0.5)
np.random.seed(123) env.seed(123) nb_actions = env.action_space.n model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(NODES)) model.add(PReLU()) model.add(Dense(NODES * 2)) model.add(PReLU()) model.add(Dense(NODES * 4)) model.add(PReLU()) model.add(Dense(NODES * 2)) model.add(PReLU()) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=memoria, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, batch_size=batch_size, target_model_update=1e-2, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=learning_rate), metrics=['mae']) if not teste: dqn.fit(env, nb_steps=epocas, visualize=False, verbose=1) dqn.save_weights('dqn_weights.h5f', overwrite=True) else: dqn.load_weights('dqn_weights_1.h5f') dqn.test(env, nb_episodes=50, visualize=False)
target_model_update=1e-2, policy=policy, enable_double_dqn=False) dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, enable_double_dqn=False) dqn1.compile(SGD, metrics=['mae']) dqn2.compile(SGD, metrics=['mae']) dqn3.compile(SGD, metrics=['mae']) dqn4.compile(SGD, metrics=['mae']) dqn1.load_weights('save/dqn1_{}_weights.h5f'.format(ENV_NAME)) dqn2.load_weights('save/dqn2_{}_weights.h5f'.format(ENV_NAME)) dqn3.load_weights('save/dqn3_{}_weights.h5f'.format(ENV_NAME)) dqn4.load_weights('save/dqn4_{}_weights.h5f'.format(ENV_NAME)) print('Weights loaded!') test1 = dqn1.test(env, nb_episodes=50, visualize=True) test2 = dqn2.test(env, nb_episodes=50, visualize=True) test3 = dqn3.test(env, nb_episodes=50, visualize=True) test4 = dqn4.test(env, nb_episodes=50, visualize=True) #pyplot.subplot(2, 1, 1) #pyplot.plot(test1.history['episode_reward'], 'r--', test2.history['episode_reward'], 'g', test3.history['episode_reward'], 'b--', test4.history['episode_reward'], 'y') ''' with open('save/history1_2018-06-08 14:53:59', 'r') as f: pp1_1 = json.load(f)
def training_game(): env = Environment( map_name="HallucinIce", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32))) input_shape = (_SIZE, _SIZE, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.2, value_test=.0, nb_steps=1e2) # Agent dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor, delta_clip=1) dqn.compile(Adam(lr=.001), metrics=["mae", "acc"]) # Tensorboard callback callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = "HallDebbugeed" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=[callbacks], nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=NUM_STEPS, window_length=1) # train_policy = BoltzmannQPolicy(tau=0.05) train_policy = EpsGreedyQPolicy() test_policy = GreedyQPolicy() if DUEL_DQN: dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=train_policy, test_policy=test_policy) filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) else: dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=train_policy, test_policy=test_policy) filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Load the model weights dqn.load_weights(FILENAME) #dqn.fit(env, nb_steps=1000, visualize=False, verbose=1, nb_max_episode_steps=500) # Finally, evaluate our algorithm for 1 episode. dqn.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
re =[] for key in dc: re.append(dc[key]) return re tt = dict_to_list(tpl.rewards_mean) mm = np.array(tt[:-1]) kk = dict_to_list(tpl.metrics_at_end) jj = np.array(kk[:-1]) metrics = np.column_stack((mm, jj)) import pickle pickle.dump( metrics, open( 'duel_dqn_%d_%s_metrics.p' %(scale, ENV_NAME), "wb" ) ) # load model for testing dqn.load_weights('/home/am/Desktop/set_tests/final/duel_dqn_%d_%s_weights.h5f' %(scale, ENV_NAME)) # setting up monitoring tools to record the testing episodes from gym import monitoring from gym.wrappers import Monitor def episode5(episode_id): if episode_id < 1: return True else: return False #rec = StatsRecorder(env,"sarsa_1") #rec.capture_frame() temp = '/home/am/Desktop/set_tests/final/duel_dqn_%d_%s' %(scale, ENV_NAME) env = Monitor(env, temp, force=True,video_callable=episode5)
test_policy=test_policy) agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae']) elif METHOD.upper() == 'SARSA': # SARSA does not require a memory. agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy) agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae']) elif METHOD.upper() == 'CEM': memory = EpisodeParameterMemory(limit=1000, window_length=1) agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) agent.compile() else: raise ('Please select DQN, DUEL_DQN, SARSA, or CEM for your method type.') # Load the model weights agent.load_weights(WEIGHT_FILENAME) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=5, visualize=True, action_repetition=5) #, nb_max_episode_steps=500)
def main(shape=10, winsize=4, test=False, num_max_test=200): INPUT_SHAPE = (shape, shape) WINDOW_LENGTH = winsize class SnakeProcessor(Processor): def process_observation(self, observation): # assert observation.ndim == 1, str(observation.shape) # (height, width, channel) assert observation.shape == INPUT_SHAPE return observation.astype( 'uint8') # saves storage in experience memory def process_state_batch(self, batch): # We could perform this processing step in `process_observation`. In this case, however, # we would need to store a `float32` array instead, which is 4x more memory intensive than # an `uint8` array. This matters if we store 1M observations. processed_batch = batch.astype('float32') / 255. return processed_batch def process_reward(self, reward): return reward env = gym.make('snakenv-v0') np.random.seed(123) env.seed(123) input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE model = make_model(input_shape, 5) memory = SequentialMemory(limit=100000, window_length=WINDOW_LENGTH) processor = SnakeProcessor() # policy = LinearAnnealedPolicy( # EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, # value_test=0, nb_steps=500000) policy = BoltzmannQPolicy() interval = 20000 dqn = DQNAgent(model=model, nb_actions=5, policy=policy, memory=memory, processor=processor, nb_steps_warmup=20000, gamma=.99, target_model_update=interval, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=0.0005), metrics=['mae']) weights_filename = 'dqn_snake_weights.h5f' if not test: # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! weights_filename = 'dqn_{}_weights.h5f'.format('snake') checkpoint_weights_filename = 'dqn_' + 'snake' + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format('snake') callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=interval) ] callbacks += [ ModelIntervalCheckpoint(weights_filename, interval=interval) ] callbacks += [FileLogger(log_filename, interval=500)] dqn.fit(env, callbacks=callbacks, nb_steps=10000000, log_interval=10000, visualize=False) # After training is done, we save the final weights one more time. # dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. # dqn.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=100) else: while True: try: dqn.load_weights(weights_filename) except Exception: print("weights not found, waiting") dqn.test(env, nb_episodes=3, visualize=True, nb_max_episode_steps=num_max_test) time.sleep(5)
model = model_from_json(loaded_model_json) print(model.summary()) # 学習後のテストをしたいだけなのに以下宣言が必要なのかは不明 一応同じようにdqnを設定していく memory = SequentialMemory(limit=2000000, window_length=1) policy = EpsGreedyQPolicy(eps=0.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=0.001)) # weighのロード dqn.load_weights(sys.argv[4]) cb_ep = EpisodeLogger() # テストを実行 # データベースで一通り売買してもらう # 時間がかかるので、consoleに状況を出すようにstepメソッド内で実装してもいいかも dqn.test(env, nb_episodes=1, visualize=False, callbacks=[cb_ep]) # 結果の視覚化 print("COUNT BUY : " + str(list(cb_ep.actions.values())[0].count(0))) print("COUNT SELL : " + str(list(cb_ep.actions.values())[0].count(1))) print("COUNT STAY : " + str(list(cb_ep.actions.values())[0].count(2))) plt.subplot(211) plt.plot(env.get_midprice_list(), linewidth=0.1)
print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. history = dqn.fit(env, nb_steps=2000, visualize=False, verbose=2) #, callbacks=[WandbLogger()]) # ## After training is done, we save the final weights. dqn.save_weights('weights/dqn_{}_weights.h5'.format(ENV_NAME), overwrite=True) dqn.load_weights('weights/dqn_{}_weights.h5'.format(ENV_NAME)) # Finally, evaluate our algorithm for 5 episodes. history_test = dqn.test(env, nb_episodes=5, visualize=False)
class DDQN: def __init__( self, env, name, memory_limit=10000, nb_eps=10000, nb_warmup=100, dueling=True, double=True, ): # Set fixed seet for the environment self.env = env self.env.seed(123) np.random.seed(123) random.seed(123) self.name = name self.log_filename = "./logs/{}_log.json".format(self.name) self.weights_filename = "./results/{}_weights.h5f".format(self.name) self.result_filename = "./results/{}_result.csv".format(self.name) # Extract the number of actions form the environment nb_action = self.env.action_space.spaces[0].n nb_actions = nb_action ** len(self.env.action_space.spaces) nb_states = self.env.observation_space.shape # Next, we build a very simple model. model = self._build_nn(nb_states, nb_actions) # Next, we define the replay memorey memory = SequentialMemory(limit=memory_limit, window_length=1) policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr="eps", nb_steps=nb_eps, value_max=1.0, # Start with full random value_min=0.1, # After nb_steps arrivate at 10% random value_test=0.0, # (Don't) pick random action when testing ) # Configure and compile our agent: # You can use every built-in Keras optimizer and even the metrics! self.dqn = DQNAgent( model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_warmup, enable_dueling_network=dueling, # Enable dueling dueling_type="avg", enable_double_dqn=double, # Enable double dqn target_model_update=1e-2, policy=policy, ) self.dqn.compile(Adam(lr=1e-2), metrics=["mae"]) def _build_nn(self, nb_states, nb_actions): model = Sequential() model.add(Flatten(input_shape=(1,) + nb_states)) model.add(Dense(16)) model.add(Activation("relu")) model.add(Dense(16)) model.add(Activation("relu")) model.add(Dense(16)) model.add(Activation("relu")) model.add(Dense(nb_actions)) model.add(Activation("linear")) return model def run(self, steps): callbacks = [FileLogger(self.log_filename)] self.dqn.fit( self.env, callbacks=callbacks, nb_steps=steps, visualize=False, verbose=1, log_interval=10000, ) # After training is done, we save the final weights. self.dqn.save_weights(self.weights_filename, overwrite=True) def test(self): self.dqn.load_weights(self.weights_filename) self.dqn.test(self.env, nb_episodes=1, visualize=False) self.env.save_results(self.result_filename)
nb_actions=n_actions, policy=policy, memory=memory, nb_steps_warmup=args.warmup_steps, gamma=.99, target_model_update=args.target_model_update, train_interval=args.train_interval, delta_clip=1., enable_dueling_network=True) agent.compile(Adam(lr=args.learning_rate), metrics=['mae']) if args.load_weights_from is not None: print(f"Loading Weights From: {args.load_weights_from}") weights_filename = f'{args.load_weights_from}/' + 'dqn_{}_weights.h5f'.format(env_name) agent.load_weights(weights_filename) if args.mode == 'train': import os current_directory = os.getcwd() model_weight_dir = os.path.join(current_directory, MODEL_NAME) if not os.path.exists(model_weight_dir): os.makedirs(model_weight_dir) weights_filename = f'{MODEL_NAME}/dqn_{env_name}_weights.h5f' checkpoint_weights_filename = f'{MODEL_NAME}/dqn_' + env_name + '_weights_{step}.h5f' log_filename = f'{MODEL_NAME}/' + 'dqn_{}_log.json'.format(env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000), FileLogger(log_filename, interval=100), TensorboardCallback(log_dir=tb_logs)
def training_game(): env = Environment( map_name="ForceField", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32))) input_shape = (_SIZE, _SIZE, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.2, value_test=.0, nb_steps=1e2) # Agent dqn = DQNAgent( model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=True, enable_dueling_network=True, # 2019-07-12 GU Zhan (Sam) when value shape problem, reduce nb_steps_warmup: # nb_steps_warmup=300, target_model_update=1e-2, policy=policy, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor, delta_clip=1) dqn.compile(Adam(lr=.001), metrics=["mae", "acc"]) # Tensorboard callback timestamp = f"{datetime.datetime.now():%Y-%m-%d %I:%M%p}" # 2019-07-12 GU Zhan (Sam) folder name for Lunux: # callbacks = keras.callbacks.TensorBoard(log_dir='./Graph/'+ timestamp, histogram_freq=0, # write_graph=True, write_images=False) # 2019-07-12 GU Zhan (Sam) folder name for Windows: callbacks = keras.callbacks.TensorBoard(log_dir='.\Graph\issgz', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = "agent" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) class Saver(Callback): def on_episode_end(self, episode, logs={}): if episode % 200 == 0: self.model.save_weights(w_file, overwrite=True) s = Saver() logs = FileLogger('DQN_Agent_log.csv', interval=1) dqn.fit(env, callbacks=[callbacks, s, logs], nb_steps=600, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
# even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME)) import threading def thread_gen(n): env = gym.make('CartPole-v0') actions = [] for i in range(50): obs = env.reset() for j in range(200): t = env.render() act = dqn.forward(obs) # Raw image
model.add(Activation('linear')) # model.summary() memory = SequentialMemory(limit=env.dataLength, window_length=WINDOW_SIZE) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=0.001), metrics=['mae']) if (os.path.exists(WEIGHTS_NAME)): dqn.load_weights(WEIGHTS_NAME) print("saved weight loaded") def getPredictionAt(index=0): state = env.getLatestState(index, window=10) state = np.reshape(state, (-1, 1)) state = np.expand_dims(state, axis=0) prediction = model.predict(state)[0] index_of_maximum = np.where(prediction == np.max(prediction)) return index_of_maximum[0] # dqn.test(env, nb_episodes=1, visualize=False) prediction = getPredictionAt(0)
model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) model.summary() train_mode = len(sys.argv) > 1 and sys.argv[1] == 'train' # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=20000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy, enable_dueling_network=True, dueling_type='avg',) dqn.compile(Adam(lr=1e-3), metrics=['mae']) if os.path.isfile(WEIGHTS_PATH) and os.access(WEIGHTS_PATH, os.R_OK): dqn.load_weights(WEIGHTS_PATH) if train_mode: dqn.fit(env, nb_steps=50000, visualize=False, verbose=2) dqn.save_weights(WEIGHTS_PATH, overwrite=True) print('save') else: # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
random.seed(123) np.random.seed(123) env.seed(123) PREPROC="onehot2steps" processor = OneHotNNInputProcessor(num_one_hot_matrices=16) model = Sequential() model.add(Flatten(input_shape=(1, 4+4*4, 16,) + (4, 4))) model.add(Dense(units=1024, activation='relu')) model.add(Dense(units=512, activation='relu')) model.add(Dense(units=256, activation='relu')) model.add(Dense(units=4, activation='linear')) memory = SequentialMemory(limit=6000, window_length=1) TRAIN_POLICY = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=0.05, value_min=0.05, value_test=0.01, nb_steps=100000) TEST_POLICY = EpsGreedyQPolicy(eps=.01) dqn = DQNAgent(model=model, nb_actions=4, test_policy=TEST_POLICY, policy=TRAIN_POLICY, memory=memory, processor=processor, nb_steps_warmup=5000, gamma=.99, target_model_update=1000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mse']) weights_filepath = data_filepath + 'train/weights_steps_'+ str(NB_STEPS_TRAINING) +'.h5f' dqn.load_weights(weights_filepath) with open(csv_filepath, 'w', newline='') as file: writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC, delimiter=';') writer.writerow(['episode', 'episode_steps', 'highest_score', 'max_tile']) _callbacks = [TestCall2048(csv_filepath)] dqn.test(env, nb_episodes=500, visualize=False, verbose=1, callbacks=_callbacks)
policy = MaxBoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_neuron_output, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae', 'accuracy']) metrics = Metrics(dqn, env) #fileName = '1D_advanced_Sequential1000_BoltzmannQ_10000steps(7)' #fileName = '1D_advanced_Sequential1000_EpsGreedyQ_10000steps(7)' #fileName = '1D_advanced_Sequential1000_MaxBoltzmannQ_10000steps(7)' #fileName = '1D_advanced_Sequential50000_BoltzmannQPolicy_10000steps(7)' #fileName = '1D_advanced_Sequential50000_MaxBoltzmannQ_1000000steps(0)' fileName = '1D__Sequential50000_BoltzmannQ_1000000steps(0)' dqn.load_weights('./output/' + fileName + '.h5f') dqn.test(env, nb_episodes=1, visualize=False, callbacks=[metrics]) metrics.export_figs(fileName) cumulated_reward = metrics.cumulated_reward() import matplotlib.pyplot as plt plt.figure() plt.plot(cumulated_reward, alpha=.6) plt.title('cumulated_reward for ' + fileName) plt.ylabel('cumulated_reward') plt.xlabel('steps') plt.savefig('./metrics/' + fileName + '_cumulated_reward.png')
model.add(Dense(3, input_dim=1,activation= 'tanh')) model.add(Dense(nb_actions)) model.add(Activation('sigmoid') policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=-1, value_test=.05, nb_steps=1000000) memory = SequentialMemory(limit=10000000, window_length=1) dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-2, policy=policy, enable_double_dqn=True, enable_dueling_network=False) dqn2.compile(Adam(lr=1e-3), metrics=['mae', 'acc']) import os.path file_path = 'Double_DQN_Taxi.h5f' if os.path.exists(file_path): dqn2.load_weights(file_path) class Saver(Callback): def on_episode_end(self, episode, logs={}): print('episode callback') if episode % 1 == 0: self.model.save_weights('Double_DQN_Taxi.h5f', overwrite=True) logs = FileLogger('Double_DQN_Taxi.csv', interval = 1) s = Saver() dqn2.fit(env, nb_steps=2e8,callbacks=[s,logs], visualize=False, verbose=2) #dqn2.test(env, nb_episodes=10, visualize=True)
class Agent(object): name = 'DQN' def __init__(self, number_of_training_steps=1e5, gamma=0.999, load_weights=False, visualize=False, dueling_network=True, double_dqn=True, nn_type='mlp', **kwargs): """ Agent constructor :param window_size: int, number of lags to include in observation :param max_position: int, maximum number of positions able to be held in inventory :param fitting_file: str, file used for z-score fitting :param testing_file: str,file used for dqn experiment :param env: environment name :param seed: int, random seed number :param action_repeats: int, number of steps to take in environment between actions :param number_of_training_steps: int, number of steps to train agent for :param gamma: float, value between 0 and 1 used to discount future DQN returns :param format_3d: boolean, format observation as matrix or tensor :param train: boolean, train or test agent :param load_weights: boolean, import existing weights :param z_score: boolean, standardize observation space :param visualize: boolean, visualize environment :param dueling_network: boolean, use dueling network architecture :param double_dqn: boolean, use double DQN for Q-value approximation """ # Agent arguments # self.env_name = id self.neural_network_type = nn_type self.load_weights = load_weights self.number_of_training_steps = number_of_training_steps self.visualize = visualize # Create environment self.env = gym.make(**kwargs) self.env_name = self.env.env.id # Create agent # NOTE: 'Keras-RL' uses its own frame-stacker self.memory_frame_stack = 1 # Number of frames to stack e.g., 1. self.model = self.create_model(name=self.neural_network_type) self.memory = SequentialMemory(limit=10000, window_length=self.memory_frame_stack) self.train = self.env.env.training self.cwd = os.path.dirname(os.path.realpath(__file__)) # create the agent self.agent = DQNAgent(model=self.model, nb_actions=self.env.action_space.n, memory=self.memory, processor=None, nb_steps_warmup=500, enable_dueling_network=dueling_network, dueling_type='avg', enable_double_dqn=double_dqn, gamma=gamma, target_model_update=1000, delta_clip=1.0) self.agent.compile(Adam(lr=float("3e-4")), metrics=['mae']) def __str__(self): # msg = '\n' # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()]) return 'Agent = {} | env = {} | number_of_training_steps = {}'.format( Agent.name, self.env_name, self.number_of_training_steps) def create_model(self, name: str = 'cnn') -> Sequential: """ Helper function get create and get the default MLP or CNN model. :param name: Neural network type ['mlp' or 'cnn'] :return: neural network """ LOGGER.info("creating model for {}".format(name)) if name == 'cnn': return self._create_cnn_model() elif name == 'mlp': return self._create_mlp_model() def _create_cnn_model(self) -> Sequential: """ Create a Convolutional neural network with dense layer at the end. :return: keras model """ features_shape = (self.memory_frame_stack, *self.env.observation_space.shape) model = Sequential() conv = Conv2D model.add( conv(input_shape=features_shape, filters=5, kernel_size=[10, 1], padding='same', activation='relu', strides=[5, 1], data_format='channels_first')) model.add( conv(filters=5, kernel_size=[5, 1], padding='same', activation='relu', strides=[2, 1], data_format='channels_first')) model.add( conv(filters=5, kernel_size=[4, 1], padding='same', activation='relu', strides=[2, 1], data_format='channels_first')) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dense(self.env.action_space.n, activation='softmax')) LOGGER.info(model.summary()) return model def _create_mlp_model(self) -> Sequential: """ Create a DENSE neural network with dense layer at the end :return: keras model """ features_shape = (self.memory_frame_stack, *self.env.observation_space.shape) model = Sequential() model.add( Dense(units=256, input_shape=features_shape, activation='relu')) model.add(Dense(units=256, activation='relu')) model.add(Flatten()) model.add(Dense(self.env.action_space.n, activation='softmax')) LOGGER.info(model.summary()) return model def start(self) -> None: """ Entry point for agent training and testing :return: (void) """ output_directory = os.path.join(self.cwd, 'dqn_weights') if not os.path.exists(output_directory): LOGGER.info('{} does not exist. Creating Directory.'.format( output_directory)) os.mkdir(output_directory) weight_name = 'dqn_{}_{}_weights.h5f'.format(self.env_name, self.neural_network_type) weights_filename = os.path.join(output_directory, weight_name) LOGGER.info("weights_filename: {}".format(weights_filename)) if self.load_weights: LOGGER.info('...loading weights for {} from\n{}'.format( self.env_name, weights_filename)) self.agent.load_weights(weights_filename) if self.train: step_chkpt = '{step}.h5f' step_chkpt = 'dqn_{}_weights_{}'.format(self.env_name, step_chkpt) checkpoint_weights_filename = os.path.join(self.cwd, 'dqn_weights', step_chkpt) LOGGER.info("checkpoint_weights_filename: {}".format( checkpoint_weights_filename)) log_filename = os.path.join( self.cwd, 'dqn_weights', 'dqn_{}_log.json'.format(self.env_name)) LOGGER.info('log_filename: {}'.format(log_filename)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] LOGGER.info('Starting training...') self.agent.fit(self.env, callbacks=callbacks, nb_steps=self.number_of_training_steps, log_interval=10000, verbose=0, visualize=self.visualize) LOGGER.info("training over.") LOGGER.info('Saving AGENT weights...') self.agent.save_weights(weights_filename, overwrite=True) LOGGER.info("AGENT weights saved.") else: LOGGER.info('Starting TEST...') self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
# After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=False) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, nb_max_start_steps=30, action_repetition=1, start_step_policy=start_step_policy, visualize=True) elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, nb_max_start_steps=30, action_repetition=1, nb_max_episode_steps=1800, start_step_policy=start_step_policy, visualize=True, starting_checkpoints=[i for i in xrange(17)]) elif args.mode == 'batch_test': # Test a batch of methods with it's corresponding weights and output it on a log. # The method expects a directory structure consisting of a folder with the different methods as directories. # There is an optional parameter --methods that takes the name of the folder that contains the methods. # Contained in each method folder there should be the weights to be tested (default: 'methods'). # The folder methods and weights starting with '__' will be ommited.
# is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), reward_range=(-1., 1.), target_model_update=10000, train_interval=4) dqn.compile(Adam(lr=.00025), metrics=['mae']) if args.mode == 'train': # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that you can the built-in Keras callbacks! weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True)
enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=train_policy, test_policy=test_policy) agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae']) elif METHOD.upper() == 'DQN': memory = SequentialMemory(limit=NUM_STEPS, window_length=1) agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=train_policy, test_policy=test_policy) agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae']) elif METHOD.upper() == 'SARSA': # SARSA does not require a memory. agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy) agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae']) elif METHOD.upper() == 'CEM': memory = EpisodeParameterMemory(limit=1000, window_length=1) agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) agent.compile() else: raise('Please select DQN, DUEL_DQN, SARSA, or CEM for your method type.') # Load the model weights agent.load_weights(WEIGHT_FILENAME) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=5, visualize=True, action_repetition=5)#, nb_max_episode_steps=500)