#### GCN Policy state = dc(init_state) if cuda_flag: state.g.ndata['x'] = state.g.ndata['x'].cuda() sum_r = 0 T1 = time.time() [idx1, idx2] = mvc.get_ilegal_actions(state) while done == False: G = state.g [pi, val] = NN(G) pi = pi.squeeze() pi[idx1] = -float('Inf') pi = F.softmax(pi, dim=0) dist = torch.distributions.categorical.Categorical(pi) action = dist.sample() new_state, reward, done = mvc.step(state, action) [idx1, idx2] = mvc.get_ilegal_actions(new_state) state = new_state sum_r += reward T2 = time.time() node_tag = state.g.ndata['x'][:, 0].cpu().squeeze().numpy().tolist() nx.draw(state.g.to_networkx(), pos, node_color=node_tag, with_labels=True) plt.show() ### Heuristic Policy state = dc(init_state) done = False sum_r2 = 0 T1 = time.time() [idx1, idx2] = mvc.get_ilegal_actions(state)