head_datetime = start_time - time_step * backoff_epochs tail_datetime = head_datetime + total_time TOTAL_EPOCHS = int(total_time / time_step) # |- Reward rewarding = {'serve': Rs, 'wait': Rw, 'fail': Rf} te = TrafficEmulator(session_df=session_df, time_step=time_step, head_datetime=head_datetime, tail_datetime=tail_datetime, rewarding=rewarding, verbose=2) ts = TrafficServer(cost=(Co, Cw), verbose=2) env_model = SJTUModel(traffic_params, queue_params, reward_params, 2) agent = Dyna_QAgentNN( env_model=env_model, num_sim=num_sim, # agent = Phi_QAgentNN( # phi_length=phi_length, dim_state=dim_state, range_state=range_state, f_build_net=None, batch_size=batch_size, learning_rate=learning_rate, momentum=momentum, reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update, rs_period=rs_period,
num_sim = 10 # Build entities rewarding = {'serve': Rs, 'wait': Rw, 'fail': Rf} te = TrafficEmulator(session_df=session_df, time_step=time_step, rewarding=rewarding, verbose=1) ts = TrafficServer(verbose=2, cost=(Co, Cs)) traffic_params = (model_type, traffic_window_size, stride, n_iter, adjust_offset, eval_period, eval_len, n_belief_bins) queue_params = (max_queue_len, ) reward_params = (Rs, Rw, Rf, Co, Cs, None) env_model = SJTUModel(traffic_params, queue_params, reward_params, verbose=1) agent = DynaQAgent(env_model=env_model, num_sim=num_sim, actions=actions, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.1, verbose=2) #agent = QAgentNN(dim_state=(1, 1, 3), range_state=((((0, 10), (0, 10), (0, 10),),),), # learning_rate=0.01, reward_scaling=10, batch_size=100, freeze_period=50, memory_size=200, num_buffer=2, # actions=actions, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.1, # verbose=2 # ) c = QController(agent=agent)