def test_shifts_caused(): env = RoRoDeck() env.reset() current_lane = env.current_Lane env.step(0) env.current_Lane = current_lane assert env._get_number_of_shifts(1) == 1 assert env._get_number_of_shifts(0) == 0
def test_is_action_legal(): env = RoRoDeck() env.reset() number_vehicle = env.vehicle_data[1][0] assert env._is_action_legal(0) for i in range(number_vehicle): assert env._is_action_legal(0) env.step(0) assert not env._is_action_legal(0)
def test_reset_method(): env = RoRoDeck() vehicle_data = env.vehicle_data.copy() end_of_lanes = env.end_of_lanes.copy() grid = env.grid.copy() grid_destination = env.grid_destination.copy() grid_vehicle_type = env.grid_vehicle_type.copy() capacity = env.total_capacity.copy() vehicle_counter = env.vehicle_Counter.copy() mandatory_cargo_mask = env.mandatory_cargo_mask.copy() loaded_vehicles = env.loaded_vehicles.copy() reward_system = env.reward_system.copy() sequence_no = env.sequence_no current_lane = env.current_Lane.copy() action_space = env.action_space.copy() possible_actions = env.possible_actions.copy() number_of_vehicles_loaded = env.number_of_vehicles_loaded.copy() env.reset() env.step(env.action_space_sample()) env.step(env.action_space_sample()) env.reset() _vehicleData = env.vehicle_data _endOFLanes = env.end_of_lanes _grid = env.grid _gridDestination = env.grid_destination _gridVehicleType = env.grid_vehicle_type _capacity = env.total_capacity _vehicleCounter = env.vehicle_Counter _mandatoryCargoMask = env.mandatory_cargo_mask _loadedVehicles = env.loaded_vehicles _rewardSystem = env.reward_system _sequence_no = env.sequence_no _currentLane = env.current_Lane _actionSpace = env.action_space _possibleActions = env.possible_actions _numberOfVehiclesLoaded = env.number_of_vehicles_loaded assert (_vehicleData == vehicle_data).all() assert (_endOFLanes == end_of_lanes).all() assert (_grid == grid).all() assert (_gridDestination == grid_destination).all() assert (_gridVehicleType == grid_vehicle_type).all() assert (_capacity == capacity).all() assert (_vehicleCounter == vehicle_counter).all() assert (_mandatoryCargoMask == mandatory_cargo_mask).all() assert (_loadedVehicles == loaded_vehicles).all() assert (_rewardSystem == reward_system).all() assert _sequence_no == sequence_no assert (_currentLane == current_lane).all() assert (_actionSpace == action_space).all() assert (_possibleActions == possible_actions).all() assert (_numberOfVehiclesLoaded == number_of_vehicles_loaded).all()
def test_get_current_state(): env = RoRoDeck() state = env.current_state assert np.shape(state) == (25,) env.reset() state = env.current_state assert np.shape(state) == (25,) env.step(env.action_space_sample()) assert not np.all(state == env.current_state)
def test_end_of_lane_method(): env = RoRoDeck() initial_end_of_lanes = env.initial_end_of_lanes env.reset() end_of_lanes = env.end_of_lanes.copy() assert np.all(end_of_lanes == initial_end_of_lanes) current_lane = env.current_Lane length = env.vehicle_data[4][env.possible_actions[0]] env.step(env.possible_actions[0]) assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length
def test_possible_actions(): env = RoRoDeck() env.vehicle_data[5][4] = 0 # disable reefer for test env.reset() number_of_vehicle = env.vehicle_data[1][0] assert len(env.vehicle_data.T) == len(env.possible_actions) == 5 for i in range(number_of_vehicle): assert len(env.vehicle_data.T) == len(env.possible_actions) == 5 env.step(0) assert len(env.possible_actions) == len(env.vehicle_data.T) - 1
def test_is_terminal_state(): env = RoRoDeck() assert not env._is_terminal_state() env.reset() assert not env._is_terminal_state() done = False for i in range(4): state, reward, done, info = env.step(env.action_space_sample()) assert not env._is_terminal_state() while not done: state, reward, done, info = env.step(env.action_space_sample()) assert env._is_terminal_state()
def test_dqn_agent(): env = RoRoDeck(lanes=8, rows=12) n_games = 2 agent = DQLearningAgent(env=env, module_path=None, gamma=0.999, epsilon=1.0, alpha=0.0005, mem_size=10000000, batch_size=64, epsilon_min=0.01, epsilon_dec=0.99999) actions_taken = 0 for i in range(n_games): done = False score = 0 observation = env.reset() while not done: action = env.action_space_sample() state_actions = env.possible_actions observation_, reward, done, info = env.step(action) actions_taken += 1 new_state_actions = env.possible_actions score += reward agent.memory.store_transition(observation, action, reward, observation_, done) observation = observation_ agent.learn() assert np.shape(np.nonzero(agent.memory.reward_memory))[1] == actions_taken assert np.shape(np.nonzero(agent.memory.terminal_memory))[1] == 2
def test_stepMethod(): env = RoRoDeck() env.reset() vehicle_data = env.vehicle_data.copy() end_of_lanes = env.end_of_lanes.copy() grid = env.grid.copy() grid_destination = env.grid_destination.copy() vehicle_counter = env.vehicle_Counter.copy() mandatory_cargo_mask = env.mandatory_cargo_mask.copy() loaded_vehicles = env.loaded_vehicles.copy() reward_system = env.reward_system.copy() sequence_no = env.sequence_no current_lane = env.current_Lane.copy() action_space = env.action_space.copy() possible_actions = env.possible_actions.copy() np.random.seed(0) action = action_space[mandatory_cargo_mask][0] if action not in possible_actions: action = env.action_space_sample() env.step(action) destination = vehicle_data[3][action] length = vehicle_data[4][action] for i in range(length): grid.T[current_lane][end_of_lanes[current_lane] + i] = sequence_no grid_destination.T[current_lane][end_of_lanes[current_lane] + i] = destination loaded_vehicles[current_lane][vehicle_counter[current_lane]] = action assert (env.grid == grid).all() assert (env.grid_destination == grid_destination).all() assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length assert env.sequence_no == sequence_no + 1 assert (env.loaded_vehicles == loaded_vehicles).all() assert (env.vehicle_Counter[current_lane] == vehicle_counter[current_lane] + 1) _vehicle_data = env.vehicle_data _reward_system = env.reward_system assert (_vehicle_data == vehicle_data).all() assert (_reward_system == reward_system).all()
def test_roro_deck(): env = RoRoDeck() env.reset() done = False i = 0 while not done: observation_, reward, done, info = env.step(env.action_space_sample()) i += 1 assert i <= 100
def test_shifts(): random_actions_1 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2, 0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3 ] random_actions_2 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2, 0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3 ] random_actions_3 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3, 2, 4 ] i = 0 env1.reset() done_ = False while not done_: action_ = random_actions_1[i] _, _, done_, _ = env1.step(action_) i += 1 i = 0 env2.reset() done_ = False while not done_: action_ = random_actions_2[i] _, _, done_, _ = env2.step(action_) i += 1 env3 = RoRoDeck(lanes=8, rows=20) env3.reset() i = 0 done_ = False while not done_: action_ = random_actions_3[i % len(random_actions_3)] _, _, done_, _ = env3.step(action_) i += 1 evaluator1 = Evaluator(env1.vehicle_data, env1.grid) shifts1 = evaluator1.evaluate(env1.get_stowage_plan()).shifts evaluator2 = Evaluator(env2.vehicle_data, env2.grid) shifts2 = evaluator2.evaluate(env2.get_stowage_plan()).shifts evaluator3 = Evaluator(env3.vehicle_data, env3.grid) shifts3 = evaluator3.evaluate(env3.get_stowage_plan()).shifts assert shifts1 == 3 assert shifts2 == 3 assert shifts3 == 19
# Preparation for Tests np.random.seed(0) # Create a random stowagePlan env1 = RoRoDeck() env2 = RoRoDeck() env1.reset() env2.reset() done = False total_rewards_env1 = 0 while not done: action = env1.action_space_sample() observation_, reward, done, info = env1.step(action) total_rewards_env1 += reward done = False total_rewards_env2 = 0 while not done: action = env2.action_space_sample() observation_, reward, done, info = env2.step(action) total_rewards_env2 += reward # Test if the Evaluator and the agents estimate are consensually def test_agent_evaluator_consensus(): evaluator1 = Evaluator(env1.vehicle_data, env1.grid) evaluation1 = evaluator1.evaluate(env1.get_stowage_plan())
def test_find_current_lane(): env = RoRoDeck(lanes=8, rows=12) env.reset() assert env.current_Lane == 3 env.step(env.action_space_sample()) assert env.current_Lane == 4