} info = "TDQ" + "_L" + str(self.env.lanes) + "-R" + str(self.env.rows) super().save_model(path, info) # Example of usage if __name__ == '__main__': np.random.seed(0) loggingBase = LoggingBase() module_path = loggingBase.module_path env = RoRoDeck(lanes=10, rows=12) number_of_episodes = 5000 agent = TDQLearning(env=env, module_path=module_path, number_of_episodes=number_of_episodes) model, total_rewards, steps_to_exit, eps_history, state_expansion = agent.train( ) plotter = Plotter(module_path, agent.number_of_episodes, show_plot=True) plotter.plotRewardPlot(total_rewards) plotter.plotStateExp(state_expansion) plotter.plotEPSHistory(np.array(eps_history)) plotter.plot_cargo_units_loaded(np.array(steps_to_exit)) evaluator = Evaluator(env.vehicle_data, env.grid) evaluation = evaluator.evaluate(env.get_stowage_plan()) print(evaluation)
def test_shifts(): random_actions_1 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2, 0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3 ] random_actions_2 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2, 0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3 ] random_actions_3 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3, 2, 4 ] i = 0 env1.reset() done_ = False while not done_: action_ = random_actions_1[i] _, _, done_, _ = env1.step(action_) i += 1 i = 0 env2.reset() done_ = False while not done_: action_ = random_actions_2[i] _, _, done_, _ = env2.step(action_) i += 1 env3 = RoRoDeck(lanes=8, rows=20) env3.reset() i = 0 done_ = False while not done_: action_ = random_actions_3[i % len(random_actions_3)] _, _, done_, _ = env3.step(action_) i += 1 evaluator1 = Evaluator(env1.vehicle_data, env1.grid) shifts1 = evaluator1.evaluate(env1.get_stowage_plan()).shifts evaluator2 = Evaluator(env2.vehicle_data, env2.grid) shifts2 = evaluator2.evaluate(env2.get_stowage_plan()).shifts evaluator3 = Evaluator(env3.vehicle_data, env3.grid) shifts3 = evaluator3.evaluate(env3.get_stowage_plan()).shifts assert shifts1 == 3 assert shifts2 == 3 assert shifts3 == 19