def validate(vali_set, model): N_JOBS = vali_set[0][0].shape[0] N_MACHINES = vali_set[0][0].shape[1] from JSSP_Env import SJSSP from mb_agg import g_pool_cal from agent_utils import sample_select_action from agent_utils import greedy_select_action import numpy as np import torch from Params import configs env = SJSSP(n_j=N_JOBS, n_m=N_MACHINES) device = torch.device(configs.device) g_pool_step = g_pool_cal( graph_pool_type=configs.graph_pool_type, batch_size=torch.Size([1, env.number_of_tasks, env.number_of_tasks]), n_nodes=env.number_of_tasks, device=device) make_spans = [] # rollout using model for data in vali_set: adj, fea, candidate, mask = env.reset(data) rewards = -env.initQuality while True: fea_tensor = torch.from_numpy(np.copy(fea)).to(device) adj_tensor = torch.from_numpy(np.copy(adj)).to(device).to_sparse() candidate_tensor = torch.from_numpy(np.copy(candidate)).to(device) mask_tensor = torch.from_numpy(np.copy(mask)).to(device) with torch.no_grad(): pi, _ = model(x=fea_tensor, graph_pool=g_pool_step, padded_nei=None, adj=adj_tensor, candidate=candidate_tensor.unsqueeze(0), mask=mask_tensor.unsqueeze(0)) # action = sample_select_action(pi, candidate) action = greedy_select_action(pi, candidate) adj, fea, reward, done, candidate, mask = env.step(action.item()) rewards += reward if done: break make_spans.append(rewards - env.posRewards) # print(rewards - env.posRewards) return np.array(make_spans)
SEED = 11 np.random.seed(SEED) env = SJSSP(n_j=n_j, n_m=n_m) # rollout env random action t1 = time.time() data = uni_instance_gen(n_j=n_j, n_m=n_m, low=low, high=high) dur = np.array([[83, 65, 3], [69, 42, 64], [27, 27, 18]]) mch = np.array([[3, 2, 1], [1, 2, 3], [2, 1, 3]]) # data = (dur, mch) print('Dur') print(data[0]) print('Mach') print(data[-1]) print() _, _, omega, mask = env.reset(data) # print('Init end time') # print(env.LBs) # print() rewards = [-env.initQuality] while True: action = np.random.choice(omega[~mask]) # print('action:', action) adj, _, reward, done, omega, mask = env.step(action) rewards.append(reward) # print('ET after action:\n', env.LBs) # print(fea) # print() if env.done(): break t2 = time.time()
g_pool_step = g_pool_cal(graph_pool_type=configs.graph_pool_type, batch_size=torch.Size( [1, env.number_of_tasks, env.number_of_tasks]), n_nodes=env.number_of_tasks, device=device) dataLoaded = np.load('./BenchDataNmpy/' + benchmark + str(N_JOBS_P) + 'x' + str(N_MACHINES_P) + '.npy') dataset = [] for i in range(dataLoaded.shape[0]): dataset.append((dataLoaded[i][0], dataLoaded[i][1])) result = [] t1 = time.time() for i, data in enumerate(dataset): adj, fea, candidate, mask = env.reset(data) ep_reward = -env.max_endTime while True: # Running policy_old: fea_tensor = torch.from_numpy(np.copy(fea)).to(device) adj_tensor = torch.from_numpy(np.copy(adj)).to(device).to_sparse() candidate_tensor = torch.from_numpy(np.copy(candidate)).to(device) mask_tensor = torch.from_numpy(np.copy(mask)).to(device) with torch.no_grad(): pi, _ = ppo.policy(x=fea_tensor, graph_pool=g_pool_step, padded_nei=None, adj=adj_tensor, candidate=candidate_tensor.unsqueeze(0), mask=mask_tensor.unsqueeze(0))