class Acrobot_GN(Acrobot): ''' ''' STATE_THETA_1, STATE_THETA_2, STATE_V_1, STATE_V_2 = 0, 1, 2, 3 MIN_V_1, MAX_V_1 = -6., 6. MIN_V_2, MAX_V_2 = -6., 6. MIN_TORQUE, MAX_TORQUE = -4., 4. MIN_ANGLE, MAX_ANGLE = -np.pi, np.pi LENGTH = 20. m = 1.0 lc = 0.5 lc2 = 0.25 l2 = 1. I1 = 0.2 I2 = 1.0 g = 9.81 def __init__(self): super(Acrobot, self).__init__() self.G1 = nx.path_graph(2).to_directed() self.node_feat_size = 2 self.edge_feat_size = 3 self.graph_feat_size = 10 self.gn = FFGN(self.graph_feat_size, self.node_feat_size, self.edge_feat_size).cuda() self.gn.load_state_dict(torch.load('model0.05.pth')) normalizers = torch.load('normalized/acrobot0.05.pth') self.in_normalizer = normalizers['in_normalizer'] self.out_normalizer = normalizers['out_normalizer'] def propagate(self, start_state, control, num_steps, integration_step): ''' Integrate system dynamics :param start_state: numpy array with the start state for the integration :param control: numpy array with constant controls to be applied during integration :param num_steps: number of steps to integrate :param integration_step: dt of integration :return: new state of the system ''' action = control delta_state = torch.zeros_like(start_state) last_state = start_state.clone() for i in range(num_steps): init_graph_features(self.G1, self.graph_feat_size, self.node_feat_size, self.edge_feat_size, cuda=True, bs=1) load_graph_features(self.G1, action, last_state.clone(), None, bs=1, noise=0, std=None) self.gn.eval() # with torch.no_grad(): G_out = self.gn(self.in_normalizer.normalize(self.G1)) for node in G_out.nodes(): delta_state[0, [node, node + 2]] = G_out.nodes[node]['feat'][0].cpu() last_state += delta_state return last_state def visualize_point(self, state): x2 = self.LENGTH * np.cos(state[self.STATE_THETA_1] - np.pi / 2) + self.LENGTH * np.cos( state[self.STATE_THETA_1] + state[self.STATE_THETA_2] - np.pi / 2) y2 = self.LENGTH * np.sin(state[self.STATE_THETA_1] - np.pi / 2) + self.LENGTH * np.sin( state[self.STATE_THETA_1] + state[self.STATE_THETA_2] - np.pi / 2) x1 = self.LENGTH * np.cos(state[self.STATE_THETA_1] - np.pi / 2) y1 = self.LENGTH * np.sin(state[self.STATE_THETA_1] - np.pi / 2) # x2 = (x + 2 * self.LENGTH) / (4 * self.LENGTH) # y2 = (y + 2 * self.LENGTH) / (4 * self.LENGTH) return x1, y1, x2, y2 def get_state_bounds(self): return [(self.MIN_ANGLE, self.MAX_ANGLE), (self.MIN_ANGLE, self.MAX_ANGLE), (self.MIN_V_1, self.MAX_V_1), (self.MIN_V_2, self.MAX_V_2)] def get_control_bounds(self): return [(self.MIN_TORQUE, self.MAX_TORQUE)]
# evaluation loop, done every epoch for data in tqdm(dl_eval): action, delta_state, last_state = data action, delta_state, last_state = action.float(), \ delta_state.float(), last_state.float() if use_cuda: action, delta_state, last_state = action.cuda(),\ delta_state.cuda(), last_state.cuda() init_graph_features(G1, graph_feat_size, node_feat_size, edge_feat_size, cuda=True, bs=200) load_graph_features(G1, action, last_state, delta_state, bs=200, noise=0) gn.eval() G_out = gn(G1)#gn(in_normalizer.normalize(G1)) init_graph_features(G_target, graph_feat_size, node_feat_size, edge_feat_size, cuda=True, bs=200) load_graph_features(G_target, action, delta_state, None, bs=200, norm=False, noise=0) # G_target_normalized = out_normalizer.normalize(G_target, False) # loss = get_graph_loss(out_normalizer.normalize(G_out, False), G_target_normalized) loss = get_graph_loss(G_out, G_target, opt.tstep) sum_loss += loss.data.item() itr += 1 writer.add_scalar('loss_eval', sum_loss / float(itr), step)