Пример #1
0
def validate(vali_set, model):
    N_JOBS = vali_set[0][0].shape[0]
    N_MACHINES = vali_set[0][0].shape[1]

    from JSSP_Env import SJSSP
    from mb_agg import g_pool_cal
    from agent_utils import sample_select_action
    from agent_utils import greedy_select_action
    import numpy as np
    import torch
    from Params import configs
    env = SJSSP(n_j=N_JOBS, n_m=N_MACHINES)
    device = torch.device(configs.device)
    g_pool_step = g_pool_cal(
        graph_pool_type=configs.graph_pool_type,
        batch_size=torch.Size([1, env.number_of_tasks, env.number_of_tasks]),
        n_nodes=env.number_of_tasks,
        device=device)
    make_spans = []
    # rollout using model
    for data in vali_set:
        adj, fea, candidate, mask = env.reset(data)
        rewards = -env.initQuality
        while True:
            fea_tensor = torch.from_numpy(np.copy(fea)).to(device)
            adj_tensor = torch.from_numpy(np.copy(adj)).to(device).to_sparse()
            candidate_tensor = torch.from_numpy(np.copy(candidate)).to(device)
            mask_tensor = torch.from_numpy(np.copy(mask)).to(device)
            with torch.no_grad():
                pi, _ = model(x=fea_tensor,
                              graph_pool=g_pool_step,
                              padded_nei=None,
                              adj=adj_tensor,
                              candidate=candidate_tensor.unsqueeze(0),
                              mask=mask_tensor.unsqueeze(0))
            # action = sample_select_action(pi, candidate)
            action = greedy_select_action(pi, candidate)
            adj, fea, reward, done, candidate, mask = env.step(action.item())
            rewards += reward
            if done:
                break
        make_spans.append(rewards - env.posRewards)
        # print(rewards - env.posRewards)
    return np.array(make_spans)
Пример #2
0
SEED = 11
np.random.seed(SEED)
env = SJSSP(n_j=n_j, n_m=n_m)

# rollout env random action
t1 = time.time()
data = uni_instance_gen(n_j=n_j, n_m=n_m, low=low, high=high)
dur = np.array([[83, 65, 3], [69, 42, 64], [27, 27, 18]])
mch = np.array([[3, 2, 1], [1, 2, 3], [2, 1, 3]])
# data = (dur, mch)
print('Dur')
print(data[0])
print('Mach')
print(data[-1])
print()
_, _, omega, mask = env.reset(data)
# print('Init end time')
# print(env.LBs)
# print()
rewards = [-env.initQuality]
while True:
    action = np.random.choice(omega[~mask])
    # print('action:', action)
    adj, _, reward, done, omega, mask = env.step(action)
    rewards.append(reward)
    # print('ET after action:\n', env.LBs)
    # print(fea)
    # print()
    if env.done():
        break
t2 = time.time()
Пример #3
0
g_pool_step = g_pool_cal(graph_pool_type=configs.graph_pool_type,
                         batch_size=torch.Size(
                             [1, env.number_of_tasks, env.number_of_tasks]),
                         n_nodes=env.number_of_tasks,
                         device=device)

dataLoaded = np.load('./BenchDataNmpy/' + benchmark + str(N_JOBS_P) + 'x' +
                     str(N_MACHINES_P) + '.npy')
dataset = []
for i in range(dataLoaded.shape[0]):
    dataset.append((dataLoaded[i][0], dataLoaded[i][1]))

result = []
t1 = time.time()
for i, data in enumerate(dataset):
    adj, fea, candidate, mask = env.reset(data)
    ep_reward = -env.max_endTime
    while True:
        # Running policy_old:
        fea_tensor = torch.from_numpy(np.copy(fea)).to(device)
        adj_tensor = torch.from_numpy(np.copy(adj)).to(device).to_sparse()
        candidate_tensor = torch.from_numpy(np.copy(candidate)).to(device)
        mask_tensor = torch.from_numpy(np.copy(mask)).to(device)

        with torch.no_grad():
            pi, _ = ppo.policy(x=fea_tensor,
                               graph_pool=g_pool_step,
                               padded_nei=None,
                               adj=adj_tensor,
                               candidate=candidate_tensor.unsqueeze(0),
                               mask=mask_tensor.unsqueeze(0))