def gen_L(grid_width, grid_height, path='L_expert_trajectories'): ''' Generates trajectories of shape L, with right turn ''' t = 3 n = 2 num_traj = 50 obstacles = create_obstacles(grid_width, grid_height) set_diff = list(set(product(tuple(range(3, grid_width-3)), tuple(range(3, grid_height-3)))) \ - set(obstacles)) T = TransitionFunction(grid_width, grid_height, obstacle_movement) expert_data_dict = {} # Number of goals is the same as number of actions num_actions, num_goals = 4, 4 env_data_dict = {'num_actions': num_actions, 'num_goals': num_goals} for i in range(num_traj): start_state = State(sample_start(set_diff), obstacles) for action_idx in range(num_actions): path_key = str(i) + '_' + str(action_idx) expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } state = start_state for j in range(n): # Set initial direction if j == 0: action = Action(action_idx) else: if action.delta == 0: action = Action(3) elif action.delta == 1: action = Action(2) elif action.delta == 2: action = Action(0) elif action.delta == 3: action = Action(1) else: raise ValueError("Invalid action delta {}".format( action.delta)) for k in range(t): expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(action.delta) state = T(state, action, j) # print(expert_data_dict[path_key]['goal']) return env_data_dict, expert_data_dict, obstacles, set_diff
def gen_L(grid_width, grid_height, path='L_expert_trajectories'): ''' Generates trajectories of shape L, with right turn ''' t = 3 n = 2 N = 200 obstacles = create_obstacles(grid_width, grid_height) set_diff = list( set( product(tuple(range(3, grid_width - 3)), tuple(range(3, grid_height - 3)))) - set(obstacles)) if not os.path.exists(path): os.makedirs(path) T = TransitionFunction(grid_width, grid_height, obstacle_movement) for i in range(N): filename = os.path.join(path, str(i) + '.txt') f = open(filename, 'w') for j in range(n): if j == 0: action = Action(random.choice(range(0, 4))) state = State(sample_start(set_diff), obstacles) else: # take right turn if action.delta == 0: action = Action(3) elif action.delta == 1: action = Action(2) elif action.delta == 2: action = Action(0) elif action.delta == 3: action = Action(1) for k in range(t): f.write(' '.join([str(e) for e in state.state]) + '\n') # write state f.write( ' '.join([str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write action f.write( ' '.join([str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write c[t]s state = T(state, action, j) f.close()
def gen_sq_rec(grid_width, grid_height, path='SR_expert_trajectories'): ''' Generates squares if starting in quadrants 1 and 4, and rectangles if starting in quadransts 2 and 3 ''' N = 200 obstacles = create_obstacles(grid_width, grid_height) if not os.path.exists(path): os.makedirs(path) T = TransitionFunction(grid_width, grid_height, obstacle_movement) for i in range(N): filename = os.path.join(path, str(i) + '.txt') f = open(filename, 'w') half = random.choice(range(0, 2)) if half == 0: # left half set_diff = list( set( product(tuple(range(0, (grid_width / 2) - 3)), tuple(range(1, grid_height)))) - set(obstacles)) start_loc = sample_start(set_diff) elif half == 1: # right half set_diff = list( set( product(tuple(range(grid_width / 2, grid_width - 2)), tuple(range(2, grid_height)))) - set(obstacles)) start_loc = sample_start(set_diff) state = State(start_loc, obstacles) if start_loc[0] >= grid_width / 2: # quadrants 1 and 4 # generate 2x2 square clockwise t = 2 n = 4 delta = 3 for j in range(n): for k in range(t): action = Action(delta) f.write(' '.join([str(e) for e in state.state]) + '\n') # write state f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write action f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write c[t]s state = T(state, action, j * 2 + k) if delta == 3: delta = 1 elif delta == 1: delta = 2 elif delta == 2: delta = 0 else: # quadrants 2 and 3 # generate 3x1 rectangle anti-clockwise t = [1, 3, 1, 3] delta = 1 for j in range(len(t)): for k in range(t[j]): action = Action(delta) f.write(' '.join([str(e) for e in state.state]) + '\n') # write state f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write action f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write c[t]s state = T(state, action, sum(t[0:j]) + k) if delta == 1: delta = 3 elif delta == 3: delta = 0 elif delta == 0: delta = 2
def gen_diverse_trajs(grid_width, grid_height): '''Generate diverse trajectories in a 21x21 grid with 4 goals. Return: Dictionary with keys as text filenames and values as dictionary. Each value dictionary contains two keys, 'states' with a list of states as value, and 'actions' with list of actions as value. ''' assert grid_width == 21 and grid_height == 21, "Incorrect grid width height" N = 20 goals = [(0, 0), (20, 20), (20, 0), (0, 20)] n_goals = len(goals) obstacles = create_obstacles(21, 21, 'diverse') T = TransitionFunction(grid_width, grid_height, obstacle_movement) set_diff = list(set(product(tuple(range(7,13)),tuple(range(7,13)))) \ - set(obstacles)) expert_data_dict = {} env_data_dict = { 'num_actions': 8, 'num_goals': n_goals, 'goals': np.array(goals), } for n in range(N): start_state = State(sample_start(set_diff), obstacles) for g in range(n_goals): # loop over goals # path 1 - go up/down till boundary and then move right/left if g == 0 or g == 2: # do path 1 only for goal 0 and goal 2 state = start_state path_key = str(n) + '_' + str(g) + '_' + str(1) + '.txt' expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) assert (state.coordinates in goals) # path 2 - go right/left till boundary and then move up/down if g == 1: # do path 2 only for goal 1 state = start_state path_key = str(n) + '_' + str(g) + '_' + str(2) + '.txt' expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) assert (state.coordinates in goals) # path 3 - go diagonally till obstacle and then # move up/down if x > 10 or right/left if y > 10 # and then move right/left or up/down till goal if g == 3: # do path 3 only for goal 3 state = start_state path_key = str(n) + '_' + str(g) + '_' + str(3) + '.txt' expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } delta = g + 4 action = Action(delta) while True: new_state = T(state, action, 0) if new_state.coordinates == state.coordinates: break expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = new_state if T(state, Action(2), 0).coordinates == state.coordinates \ or T(state, Action(3), 0).coordinates == state.coordinates: delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[ 1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[ 0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) else: delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[ 0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[ 1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) assert (state.coordinates in goals) return env_data_dict, expert_data_dict, obstacles, set_diff
parser.add_argument('--clip-epsilon', type=float, default=0.2, metavar='N', help='Clipping for PPO grad') parser.add_argument('--checkpoint', type=str, required=True, help='path to checkpoint') args = parser.parse_args() #-----Environment-----# width = height = 12 obstacles = create_obstacles(width, height) set_diff = list(set(product(tuple(range(3, width-3)), repeat=2)) - set(obstacles)) start_loc = sample_start(set_diff) s = State(start_loc, obstacles) T = TransitionFunction(width, height, obstacle_movement) if args.expert_path == 'SR2_expert_trajectories/': R = RewardFunction_SR2(-1.0,1.0,width) else: R = RewardFunction(-1.0,1.0) num_inputs = s.state.shape[0] num_actions = 4 if args.expert_path == 'SR2_expert_trajectories/': num_c = 2 else: num_c = 4 #env.seed(args.seed) torch.manual_seed(args.seed)
def create_environment(self): self.width, self.height = 21, 21 self.transition_func = TransitionFunction(self.width, self.height, obstacle_movement)