def main(grid): n_domains = 1 n_traj = 1 max_obs = 30, #max number of obstacles max_obs_size = None, n_actions = 8 gen = False if grid.size.width == 100 or grid.size.height == 100: k = 48 im_size = 100 training_file = 'trained/30k_no_block_dataset_vin_64x64.pth' elif grid.size.width == 64 or grid.size.height == 64: k = 48 im_size = 64 training_file = 'trained/30k_no_block_dataset_vin_64x64.pth' elif grid.size.width == 28 or grid.size.height == 28: k = 36 im_size = 28 training_file = 'trained/60k_no_block_att3_vin_28x28.pth' elif grid.size.width == 16 or grid.size.height == 16: k = 20 im_size = 16 training_file = 'trained/60k_no_block_att3_vin_16x16.pth' elif grid.size.width == 8 or grid.size.height == 8: k = 10 im_size = 8 training_file = 'trained/60k_no_block_att3_vin_8x8.pth' else: k = 48 im_size = grid.size.width training_file = 'trained/30k_no_block_dataset_vin_64x64.pth' # Parsing training parameters parser = argparse.ArgumentParser() parser.add_argument( '--weights', type=str, # default='trained/60k_no_block_att3_vin_8x8.pth', default=training_file, help='Path to trained weights') parser.add_argument('--plot', action='store_true', default=False) parser.add_argument('--gen', action='store_true', default=False) parser.add_argument('--imsize', type=int, default=im_size, help='Size of image') parser.add_argument('--k', type=int, default=k, help='Number of Value Iterations') parser.add_argument('--l_i', type=int, default=2, help='Number of channels in input layer') parser.add_argument('--l_h', type=int, default=150, help='Number of channels in first hidden layer') parser.add_argument( '--l_q', type=int, default=10, help='Number of channels in q layer (~actions) in VI-module') config = parser.parse_args() # print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@config ",config) use_GPU = torch.cuda.is_available() # Instantiate a VIN model vin = VIN(config) # Load model parameters vin.load_state_dict(torch.load(config.weights)) # Use GPU if available if use_GPU: vin = vin.cuda() counter, total_no_soln = 0, 0 global data data = [] t_list = [] total_dev_non_rel, total_dev_rel = 0.0, 0.0 total_dist, total_astar_dist = 0.0, 0.0 metrics = True #this enables displaying the distance left to reach goal upon a failure dist_remain_avg = 0.0 for dom in range(n_domains): if gen: goal = [ np.random.randint(config.imsize), np.random.randint(config.imsize) ] obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) # Add obstacles to map n_obs = obs.add_n_rand_obs(max_obs) # Add border to map border_res = obs.add_border() # Ensure we have valid map if n_obs == 0 or not border_res: continue start = None else: wpn = True # path = './resources/maps/' # path = './resources/testing_maps/8x8/' # mp, goal, start = open_map(dom,path) print(grid) mp = grid.grid goal = [grid.goal.position.x, grid.goal.position.y] start = [grid.agent.position.x, grid.agent.position.y] # path = './maps/8_data_300' # mp, goal, start = open_map_list(dom,path) mp[start[1]][ start[0]] = 0 #Set the start position as freespace too mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too goal = [ goal[1], goal[0] ] #swap them around, for the row col format (x = col not row) start = [start[1], start[0]] obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) obs.dom = mp # print('Goal:', goal,'agent', start, 'gridb4im', mp) # Get final map #Between mp and im, the 0 should become 1., and the 1 should become 0. im = obs.get_final() # print('Grid: ', mp, '\n Agent', start, '\n Goal', goal) # print('Im:', im) #1 is obstacles. #set obs.dom as the mp # Generate gridworld from obstacle map # print(' im: %s ', im) # print('goal:', goal) # print('goal:', start) G = gridworld(im, goal[0], goal[1]) # Get value prior # print('144') value_prior = G.get_reward_prior() # Sample random trajectories to our goal states_xy, states_one_hot = sample_trajectory( G, n_traj, start, gen) #dijkstra trajectory # print('states_xy', states_xy[0] , len(states_xy[0])) if gen and len(states_xy[0]) > 0: save_image(G.image, (goal[0], goal[1]), states_xy[0][0], states_xy, states_one_hot, counter) #this saves the maps counter += 1 for i in range(n_traj): if len(states_xy[i]) > 1: t0 = time.time() # Get number of steps to goal L = len(states_xy[i]) * 2 # Allocate space for predicted steps pred_traj = np.zeros((L, 2)) # Set starting position pred_traj[0, :] = states_xy[i][0, :] for j in range(1, L): # Transform current state data state_data = pred_traj[j - 1, :] state_data = state_data.astype(np.int) # Transform domain to Networks expected input shape im_data = G.image.astype(np.int) im_data = 1 - im_data im_data = im_data.reshape(1, 1, config.imsize, config.imsize) # Transfrom value prior to Networks expected input shape value_data = value_prior.astype(np.int) value_data = value_data.reshape(1, 1, config.imsize, config.imsize) # Get inputs as expected by network X_in = torch.from_numpy( np.append(im_data, value_data, axis=1)).float() S1_in = torch.from_numpy(state_data[0].reshape( [1, 1])).float() S2_in = torch.from_numpy(state_data[1].reshape( [1, 1])).float() # Send Tensors to GPU if available if use_GPU: X_in = X_in.cuda() S1_in = S1_in.cuda() S2_in = S2_in.cuda() # Wrap to autograd.Variable X_in, S1_in, S2_in = Variable(X_in), Variable( S1_in), Variable(S2_in) # Forward pass in our neural net _, predictions = vin(X_in, S1_in, S2_in, config) _, indices = torch.max(predictions.cpu(), 1, keepdim=True) a = indices.data.numpy()[0][0] # Transform prediction to indices s = G.map_ind_to_state(pred_traj[j - 1, 0], pred_traj[j - 1, 1]) ns = G.sample_next_state(s, a) nr, nc = G.get_coords(ns) pred_traj[j, 0] = nr pred_traj[j, 1] = nc if nr == goal[0] and nc == goal[1]: # We hit goal so fill remaining steps pred_traj[j + 1:, 0] = nr pred_traj[j + 1:, 1] = nc break # Plot optimal and predicted path (also start, end) if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: print('success!') # print('pred_traj', pred_traj) return pred_traj return pred_traj
def main(config, n_domains=100, max_obs=30, max_obs_size=None, n_traj=1, n_actions=8): # Correct vs total: correct, total = 0.0, 0.0 # Automatic swith of GPU mode if available use_GPU = torch.cuda.is_available() # Instantiate a VIN model vin = VIN(config) # Load model parameters vin.load_state_dict(torch.load(config.weights)) # Use GPU if available if use_GPU: vin = vin.cuda() for dom in range(n_domains): # Randomly select goal position goal = [ np.random.randint(config.imsize), np.random.randint(config.imsize) ] # Generate obstacle map obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) # Add obstacles to map n_obs = obs.add_n_rand_obs(max_obs) # Add border to map border_res = obs.add_border() # Ensure we have valid map if n_obs == 0 or not border_res: continue # Get final map im = obs.get_final() # Generate gridworld from obstacle map G = gridworld(im, goal[0], goal[1]) # Get value prior value_prior = G.get_reward_prior() # Sample random trajectories to our goal states_xy, states_one_hot = sample_trajectory(G, n_traj) for i in range(n_traj): if len(states_xy[i]) > 1: # Get number of steps to goal L = len(states_xy[i]) * 2 # Allocate space for predicted steps pred_traj = np.zeros((L, 2)) # Set starting position pred_traj[0, :] = states_xy[i][0, :] for j in range(1, L): # Transform current state data state_data = pred_traj[j - 1, :] state_data = state_data.astype(np.int) # Transform domain to Networks expected input shape im_data = G.image.astype(np.int) im_data = 1 - im_data im_data = im_data.reshape(1, 1, config.imsize, config.imsize) # Transfrom value prior to Networks expected input shape value_data = value_prior.astype(np.int) value_data = value_data.reshape(1, 1, config.imsize, config.imsize) # Get inputs as expected by network X_in = torch.from_numpy( np.append(im_data, value_data, axis=1)).float() S1_in = torch.from_numpy(state_data[0].reshape( [1, 1])).float() S2_in = torch.from_numpy(state_data[1].reshape( [1, 1])).float() # Send Tensors to GPU if available if use_GPU: X_in = X_in.cuda() S1_in = S1_in.cuda() S2_in = S2_in.cuda() # Wrap to autograd.Variable X_in, S1_in, S2_in = Variable(X_in), Variable( S1_in), Variable(S2_in) # Forward pass in our neural net _, predictions = vin(X_in, S1_in, S2_in, config) _, indices = torch.max(predictions.cpu(), 1, keepdim=True) a = indices.data.numpy()[0][0] # Transform prediction to indices s = G.map_ind_to_state(pred_traj[j - 1, 0], pred_traj[j - 1, 1]) ns = G.sample_next_state(s, a) nr, nc = G.get_coords(ns) pred_traj[j, 0] = nr pred_traj[j, 1] = nc if nr == goal[0] and nc == goal[1]: # We hit goal so fill remaining steps pred_traj[j + 1:, 0] = nr pred_traj[j + 1:, 1] = nc break # Plot optimal and predicted path (also start, end) if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: correct += 1 total += 1 if config.plot == True: visualize(G.image.T, states_xy[i], pred_traj) sys.stdout.write("\r" + str(int((float(dom) / n_domains) * 100.0)) + "%") sys.stdout.flush() sys.stdout.write("\n") print('Rollout Accuracy: {:.2f}%'.format(100 * (correct / total)))
def make_data(dom_size, n_domains, max_obs, max_obs_size, n_traj, state_batch_size): X_l = [] S1_l = [] S2_l = [] Labels_l = [] dom = 0.0 while dom <= n_domains: goal = [np.random.randint(dom_size[0]), np.random.randint(dom_size[1])] # Generate obstacle map obs = obstacles([dom_size[0], dom_size[1]], goal, max_obs_size) # Add obstacles to map n_obs = obs.add_n_rand_obs(max_obs) # Add border to map border_res = obs.add_border() # Ensure we have valid map if n_obs == 0 or not border_res: continue # Get final map im = obs.get_final() # Generate gridworld from obstacle map G = gridworld(im, goal[0], goal[1]) # Get value prior value_prior = G.t_get_reward_prior() # Sample random trajectories to our goal states_xy, states_one_hot = sample_trajectory(G, n_traj) for i in range(n_traj): if len(states_xy[i]) > 1: # Get optimal actions for each state actions = extract_action(states_xy[i]) ns = states_xy[i].shape[0] - 1 # Invert domain image => 0 = free, 1 = obstacle image = 1 - im # Resize domain and goal images and concate image_data = np.resize(image, (1, 1, dom_size[0], dom_size[1])) value_data = np.resize(value_prior, (1, 1, dom_size[0], dom_size[1])) iv_mixed = np.concatenate((image_data, value_data), axis=1) X_current = np.tile(iv_mixed, (ns, 1, 1, 1)) # Resize states S1_current = np.expand_dims(states_xy[i][0:ns, 0], axis=1) S2_current = np.expand_dims(states_xy[i][0:ns, 1], axis=1) # Resize labels Labels_current = np.expand_dims(actions, axis=1) # Append to output list X_l.append(X_current) S1_l.append(S1_current) S2_l.append(S2_current) Labels_l.append(Labels_current) dom += 1 sys.stdout.write("\r" + str(int((dom / n_domains) * 100)) + "%") sys.stdout.flush() sys.stdout.write("\n") # Concat all outputs X_f = np.concatenate(X_l) S1_f = np.concatenate(S1_l) S2_f = np.concatenate(S2_l) Labels_f = np.concatenate(Labels_l) return X_f, S1_f, S2_f, Labels_f
def X2R(self,X): goal = [np.argmax(X[1])//config.imsize,np.argmax(X[1])%config.imsize] G = gridworld(1-X[0], goal[0], goal[1]) R = X[1] - (1-X[0])*0.02 - 2 * X[0] return R,goal
def __init__(self,X): self.R ,self.goal = self.X2R(X) self.G = gridworld(1-X[0], self.goal[0], self.goal[1]) self.actions = np.asarray([[-1, 0], [1, 0], [0, 1], [0, -1], [-1, 1], [-1, -1], [1, 1], [1, -1]])
def main(config, n_domains=100, max_obs=30, max_obs_size=None, n_traj=1, n_actions=8): # Correct vs total: correct, total = 0.0, 0.0 # Automatic swith of GPU mode if available use_GPU = torch.cuda.is_available() # Instantiate a VIN model vin = VIN(config) # Load model parameters vin.load_state_dict(torch.load(config.weights)) # Use GPU if available if use_GPU: vin = vin.cuda() for dom in range(n_domains): # Randomly select goal position goal = [ np.random.randint(config.imsize), np.random.randint(config.imsize) ] # Generate obstacle map obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) # Add obstacles to map n_obs = obs.add_n_rand_obs(max_obs) # Add border to map border_res = obs.add_border() # Ensure we have valid map if n_obs == 0 or not border_res: continue # Get final map im = obs.get_final() # Generate gridworld from obstacle map G = gridworld(im, goal[0], goal[1]) # Get value prior value_prior = G.get_reward_prior() # Sample random trajectories to our goal states_xy, states_one_hot = sample_trajectory(G, n_traj) for i in range(n_traj): if len(states_xy[i]) > 1: # Get number of steps to goal L = len(states_xy[i]) * 2 # Allocate space for predicted steps pred_traj = np.zeros((L, 2)) # Set starting position pred_traj[0, :] = states_xy[i][0, :] for j in range(1, L): # Transform current state data state_data = pred_traj[j - 1, :] state_data = state_data.astype(np.int) # Transform domain to Networks expected input shape im_data = G.image.astype(np.int) im_data = 1 - im_data im_data = im_data.reshape(1, 1, config.imsize, config.imsize) # Transfrom value prior to Networks expected input shape value_data = value_prior.astype(np.int) value_data = value_data.reshape(1, 1, config.imsize, config.imsize) # Get inputs as expected by network X_in = torch.from_numpy( np.append(im_data, value_data, axis=1)).float() S1_in = torch.from_numpy(state_data[0].reshape( [1, 1])).float() S2_in = torch.from_numpy(state_data[1].reshape( [1, 1])).float() # Send Tensors to GPU if available if use_GPU: X_in = X_in.cuda() S1_in = S1_in.cuda() S2_in = S2_in.cuda() # Wrap to autograd.Variable X_in, S1_in, S2_in = Variable(X_in), Variable( S1_in), Variable(S2_in) # Forward pass in our neural net _, predictions = vin(X_in, S1_in, S2_in, config) _, indices = torch.max(predictions.cpu(), 1, keepdim=True) a = indices.data.numpy()[0][0] # Transform prediction to indices s = G.map_ind_to_state(pred_traj[j - 1, 0], pred_traj[j - 1, 1]) ns = G.sample_next_state(s, a) nr, nc = G.get_coords(ns) pred_traj[j, 0] = nr pred_traj[j, 1] = nc if nr == goal[0] and nc == goal[1]: # We hit goal so fill remaining steps pred_traj[j + 1:, 0] = nr pred_traj[j + 1:, 1] = nc break # Plot optimal and predicted path (also start, end) if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: correct += 1 total += 1 if config.plot == True: visualize(G.image.T, states_xy[i], pred_traj) sys.stdout.write("\r" + str(int( (float(dom) / n_domains) * 100.0)) + "%") sys.stdout.flush() sys.stdout.write("\n") print('Rollout Accuracy: {:.2f}%'.format(100 * (correct / total)))