示例#1
0
def test_shifts_caused():
    env = RoRoDeck()
    env.reset()

    current_lane = env.current_Lane
    env.step(0)
    env.current_Lane = current_lane
    assert env._get_number_of_shifts(1) == 1
    assert env._get_number_of_shifts(0) == 0
示例#2
0
def test_is_action_legal():
    env = RoRoDeck()
    env.reset()
    number_vehicle = env.vehicle_data[1][0]

    assert env._is_action_legal(0)
    for i in range(number_vehicle):
        assert env._is_action_legal(0)
        env.step(0)
    assert not env._is_action_legal(0)
示例#3
0
def test_reset_method():
    env = RoRoDeck()
    vehicle_data = env.vehicle_data.copy()
    end_of_lanes = env.end_of_lanes.copy()
    grid = env.grid.copy()
    grid_destination = env.grid_destination.copy()
    grid_vehicle_type = env.grid_vehicle_type.copy()
    capacity = env.total_capacity.copy()
    vehicle_counter = env.vehicle_Counter.copy()
    mandatory_cargo_mask = env.mandatory_cargo_mask.copy()
    loaded_vehicles = env.loaded_vehicles.copy()
    reward_system = env.reward_system.copy()
    sequence_no = env.sequence_no
    current_lane = env.current_Lane.copy()
    action_space = env.action_space.copy()
    possible_actions = env.possible_actions.copy()
    number_of_vehicles_loaded = env.number_of_vehicles_loaded.copy()

    env.reset()
    env.step(env.action_space_sample())
    env.step(env.action_space_sample())
    env.reset()

    _vehicleData = env.vehicle_data
    _endOFLanes = env.end_of_lanes
    _grid = env.grid
    _gridDestination = env.grid_destination
    _gridVehicleType = env.grid_vehicle_type
    _capacity = env.total_capacity
    _vehicleCounter = env.vehicle_Counter
    _mandatoryCargoMask = env.mandatory_cargo_mask
    _loadedVehicles = env.loaded_vehicles
    _rewardSystem = env.reward_system
    _sequence_no = env.sequence_no
    _currentLane = env.current_Lane
    _actionSpace = env.action_space
    _possibleActions = env.possible_actions
    _numberOfVehiclesLoaded = env.number_of_vehicles_loaded

    assert (_vehicleData == vehicle_data).all()
    assert (_endOFLanes == end_of_lanes).all()
    assert (_grid == grid).all()
    assert (_gridDestination == grid_destination).all()
    assert (_gridVehicleType == grid_vehicle_type).all()
    assert (_capacity == capacity).all()
    assert (_vehicleCounter == vehicle_counter).all()
    assert (_mandatoryCargoMask == mandatory_cargo_mask).all()
    assert (_loadedVehicles == loaded_vehicles).all()
    assert (_rewardSystem == reward_system).all()
    assert _sequence_no == sequence_no
    assert (_currentLane == current_lane).all()
    assert (_actionSpace == action_space).all()
    assert (_possibleActions == possible_actions).all()
    assert (_numberOfVehiclesLoaded == number_of_vehicles_loaded).all()
示例#4
0
def test_get_current_state():
    env = RoRoDeck()

    state = env.current_state
    assert np.shape(state) == (25,)
    env.reset()
    state = env.current_state
    assert np.shape(state) == (25,)

    env.step(env.action_space_sample())
    assert not np.all(state == env.current_state)
示例#5
0
def test_end_of_lane_method():
    env = RoRoDeck()
    initial_end_of_lanes = env.initial_end_of_lanes
    env.reset()
    end_of_lanes = env.end_of_lanes.copy()
    assert np.all(end_of_lanes == initial_end_of_lanes)

    current_lane = env.current_Lane
    length = env.vehicle_data[4][env.possible_actions[0]]

    env.step(env.possible_actions[0])

    assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length
示例#6
0
def test_possible_actions():
    env = RoRoDeck()
    env.vehicle_data[5][4] = 0  # disable reefer for test
    env.reset()
    number_of_vehicle = env.vehicle_data[1][0]

    assert len(env.vehicle_data.T) == len(env.possible_actions) == 5

    for i in range(number_of_vehicle):
        assert len(env.vehicle_data.T) == len(env.possible_actions) == 5
        env.step(0)

    assert len(env.possible_actions) == len(env.vehicle_data.T) - 1
示例#7
0
def test_is_terminal_state():
    env = RoRoDeck()
    assert not env._is_terminal_state()

    env.reset()
    assert not env._is_terminal_state()

    done = False
    for i in range(4):
        state, reward, done, info = env.step(env.action_space_sample())
    assert not env._is_terminal_state()

    while not done:
        state, reward, done, info = env.step(env.action_space_sample())
    assert env._is_terminal_state()
示例#8
0
def test_dqn_agent():
    env = RoRoDeck(lanes=8, rows=12)
    n_games = 2
    agent = DQLearningAgent(env=env,
                            module_path=None,
                            gamma=0.999,
                            epsilon=1.0,
                            alpha=0.0005,
                            mem_size=10000000,
                            batch_size=64,
                            epsilon_min=0.01,
                            epsilon_dec=0.99999)

    actions_taken = 0
    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = env.action_space_sample()
            state_actions = env.possible_actions
            observation_, reward, done, info = env.step(action)
            actions_taken += 1
            new_state_actions = env.possible_actions
            score += reward
            agent.memory.store_transition(observation, action, reward,
                                          observation_, done)
            observation = observation_
            agent.learn()

    assert np.shape(np.nonzero(agent.memory.reward_memory))[1] == actions_taken
    assert np.shape(np.nonzero(agent.memory.terminal_memory))[1] == 2
示例#9
0
def test_stepMethod():
    env = RoRoDeck()
    env.reset()

    vehicle_data = env.vehicle_data.copy()
    end_of_lanes = env.end_of_lanes.copy()
    grid = env.grid.copy()
    grid_destination = env.grid_destination.copy()
    vehicle_counter = env.vehicle_Counter.copy()
    mandatory_cargo_mask = env.mandatory_cargo_mask.copy()
    loaded_vehicles = env.loaded_vehicles.copy()
    reward_system = env.reward_system.copy()
    sequence_no = env.sequence_no
    current_lane = env.current_Lane.copy()
    action_space = env.action_space.copy()
    possible_actions = env.possible_actions.copy()

    np.random.seed(0)

    action = action_space[mandatory_cargo_mask][0]
    if action not in possible_actions:
        action = env.action_space_sample()

    env.step(action)

    destination = vehicle_data[3][action]
    length = vehicle_data[4][action]

    for i in range(length):
        grid.T[current_lane][end_of_lanes[current_lane] + i] = sequence_no
        grid_destination.T[current_lane][end_of_lanes[current_lane] + i] = destination

    loaded_vehicles[current_lane][vehicle_counter[current_lane]] = action

    assert (env.grid == grid).all()
    assert (env.grid_destination == grid_destination).all()
    assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length
    assert env.sequence_no == sequence_no + 1
    assert (env.loaded_vehicles == loaded_vehicles).all()
    assert (env.vehicle_Counter[current_lane] == vehicle_counter[current_lane] + 1)

    _vehicle_data = env.vehicle_data
    _reward_system = env.reward_system

    assert (_vehicle_data == vehicle_data).all()
    assert (_reward_system == reward_system).all()
示例#10
0
def test_roro_deck():
    env = RoRoDeck()
    env.reset()
    done = False
    i = 0

    while not done:
        observation_, reward, done, info = env.step(env.action_space_sample())
        i += 1
        assert i <= 100
示例#11
0
def test_shifts():
    random_actions_1 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2,
        0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4,
        1, 4, 0, 3
    ]

    random_actions_2 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2,
        0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4,
        1, 4, 0, 3
    ]

    random_actions_3 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2,
        4, 3, 2, 3, 0, 0, 0, 3, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2,
        4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 1, 3, 1, 3, 3, 1, 1, 1,
        0, 3, 0, 4, 1, 4, 0, 3, 2, 4
    ]

    i = 0
    env1.reset()
    done_ = False
    while not done_:
        action_ = random_actions_1[i]
        _, _, done_, _ = env1.step(action_)
        i += 1

    i = 0
    env2.reset()
    done_ = False
    while not done_:
        action_ = random_actions_2[i]
        _, _, done_, _ = env2.step(action_)
        i += 1

    env3 = RoRoDeck(lanes=8, rows=20)
    env3.reset()
    i = 0
    done_ = False
    while not done_:
        action_ = random_actions_3[i % len(random_actions_3)]
        _, _, done_, _ = env3.step(action_)
        i += 1
    evaluator1 = Evaluator(env1.vehicle_data, env1.grid)
    shifts1 = evaluator1.evaluate(env1.get_stowage_plan()).shifts

    evaluator2 = Evaluator(env2.vehicle_data, env2.grid)
    shifts2 = evaluator2.evaluate(env2.get_stowage_plan()).shifts

    evaluator3 = Evaluator(env3.vehicle_data, env3.grid)
    shifts3 = evaluator3.evaluate(env3.get_stowage_plan()).shifts

    assert shifts1 == 3
    assert shifts2 == 3
    assert shifts3 == 19
示例#12
0
# Preparation for Tests
np.random.seed(0)

# Create a random stowagePlan
env1 = RoRoDeck()
env2 = RoRoDeck()

env1.reset()
env2.reset()

done = False
total_rewards_env1 = 0

while not done:
    action = env1.action_space_sample()
    observation_, reward, done, info = env1.step(action)
    total_rewards_env1 += reward

done = False
total_rewards_env2 = 0

while not done:
    action = env2.action_space_sample()
    observation_, reward, done, info = env2.step(action)
    total_rewards_env2 += reward


# Test if the Evaluator and the agents estimate are consensually
def test_agent_evaluator_consensus():
    evaluator1 = Evaluator(env1.vehicle_data, env1.grid)
    evaluation1 = evaluator1.evaluate(env1.get_stowage_plan())
示例#13
0
def test_find_current_lane():
    env = RoRoDeck(lanes=8, rows=12)
    env.reset()
    assert env.current_Lane == 3
    env.step(env.action_space_sample())
    assert env.current_Lane == 4