Python TransitionFunction示例，grid_world.TransitionFunction Python示例

示例#1

0

显示文件

def gen_L(grid_width, grid_height, path='L_expert_trajectories'):
    ''' Generates trajectories of shape L, with right turn '''
    t = 3
    n = 2
    num_traj = 50

    obstacles = create_obstacles(grid_width, grid_height)
    set_diff = list(set(product(tuple(range(3, grid_width-3)),
                                tuple(range(3, grid_height-3)))) \
                                        - set(obstacles))

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)
    expert_data_dict = {}
    # Number of goals is the same as number of actions
    num_actions, num_goals = 4, 4
    env_data_dict = {'num_actions': num_actions, 'num_goals': num_goals}

    for i in range(num_traj):
        start_state = State(sample_start(set_diff), obstacles)
        for action_idx in range(num_actions):

            path_key = str(i) + '_' + str(action_idx)
            expert_data_dict[path_key] = {
                'state': [],
                'action': [],
                'goal': []
            }

            state = start_state

            for j in range(n):
                # Set initial direction
                if j == 0:
                    action = Action(action_idx)
                else:
                    if action.delta == 0:
                        action = Action(3)
                    elif action.delta == 1:
                        action = Action(2)
                    elif action.delta == 2:
                        action = Action(0)
                    elif action.delta == 3:
                        action = Action(1)
                    else:
                        raise ValueError("Invalid action delta {}".format(
                            action.delta))

                for k in range(t):
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(action.delta)
                    state = T(state, action, j)
        # print(expert_data_dict[path_key]['goal'])

    return env_data_dict, expert_data_dict, obstacles, set_diff

示例#2

0

显示文件

def gen_L(grid_width, grid_height, path='L_expert_trajectories'):
    ''' Generates trajectories of shape L, with right turn '''
    t = 3
    n = 2
    N = 200

    obstacles = create_obstacles(grid_width, grid_height)
    set_diff = list(
        set(
            product(tuple(range(3, grid_width -
                                3)), tuple(range(3, grid_height - 3)))) -
        set(obstacles))

    if not os.path.exists(path):
        os.makedirs(path)

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)

    for i in range(N):
        filename = os.path.join(path, str(i) + '.txt')
        f = open(filename, 'w')
        for j in range(n):
            if j == 0:
                action = Action(random.choice(range(0, 4)))
                state = State(sample_start(set_diff), obstacles)
            else:  # take right turn
                if action.delta == 0:
                    action = Action(3)
                elif action.delta == 1:
                    action = Action(2)
                elif action.delta == 2:
                    action = Action(0)
                elif action.delta == 3:
                    action = Action(1)
            for k in range(t):
                f.write(' '.join([str(e)
                                  for e in state.state]) + '\n')  # write state
                f.write(
                    ' '.join([str(e)
                              for e in oned_to_onehot(action.delta, 4)]) +
                    '\n')  # write action
                f.write(
                    ' '.join([str(e)
                              for e in oned_to_onehot(action.delta, 4)]) +
                    '\n')  # write c[t]s
                state = T(state, action, j)

        f.close()

示例#3

0

显示文件

def gen_sq_rec(grid_width, grid_height, path='SR_expert_trajectories'):
    ''' Generates squares if starting in quadrants 1 and 4, and rectangles if starting in quadransts 2 and 3 '''
    N = 200

    obstacles = create_obstacles(grid_width, grid_height)

    if not os.path.exists(path):
        os.makedirs(path)

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)

    for i in range(N):
        filename = os.path.join(path, str(i) + '.txt')
        f = open(filename, 'w')
        half = random.choice(range(0, 2))
        if half == 0:  # left half
            set_diff = list(
                set(
                    product(tuple(range(0, (grid_width / 2) -
                                        3)), tuple(range(1, grid_height)))) -
                set(obstacles))
            start_loc = sample_start(set_diff)
        elif half == 1:  # right half
            set_diff = list(
                set(
                    product(tuple(range(grid_width / 2, grid_width -
                                        2)), tuple(range(2, grid_height)))) -
                set(obstacles))
            start_loc = sample_start(set_diff)

        state = State(start_loc, obstacles)

        if start_loc[0] >= grid_width / 2:  # quadrants 1 and 4
            # generate 2x2 square clockwise
            t = 2
            n = 4
            delta = 3

            for j in range(n):
                for k in range(t):
                    action = Action(delta)
                    f.write(' '.join([str(e) for e in state.state]) +
                            '\n')  # write state
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write action
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write c[t]s
                    state = T(state, action, j * 2 + k)

                if delta == 3:
                    delta = 1
                elif delta == 1:
                    delta = 2
                elif delta == 2:
                    delta = 0

        else:  # quadrants 2 and 3
            # generate 3x1 rectangle anti-clockwise
            t = [1, 3, 1, 3]
            delta = 1

            for j in range(len(t)):
                for k in range(t[j]):
                    action = Action(delta)
                    f.write(' '.join([str(e) for e in state.state]) +
                            '\n')  # write state
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write action
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write c[t]s
                    state = T(state, action, sum(t[0:j]) + k)

                if delta == 1:
                    delta = 3
                elif delta == 3:
                    delta = 0
                elif delta == 0:
                    delta = 2

示例#4

0

显示文件

def gen_diverse_trajs(grid_width, grid_height):
    '''Generate diverse trajectories in a 21x21 grid with 4 goals.

    Return: Dictionary with keys as text filenames and values as dictionary.
        Each value dictionary contains two keys, 'states' with a list of states
        as value, and 'actions' with list of actions as value.
    '''

    assert grid_width == 21 and grid_height == 21, "Incorrect grid width height"
    N = 20
    goals = [(0, 0), (20, 20), (20, 0), (0, 20)]
    n_goals = len(goals)

    obstacles = create_obstacles(21, 21, 'diverse')

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)

    set_diff = list(set(product(tuple(range(7,13)),tuple(range(7,13)))) \
            - set(obstacles))
    expert_data_dict = {}
    env_data_dict = {
        'num_actions': 8,
        'num_goals': n_goals,
        'goals': np.array(goals),
    }

    for n in range(N):

        start_state = State(sample_start(set_diff), obstacles)

        for g in range(n_goals):  # loop over goals
            # path 1 - go up/down till boundary and then move right/left

            if g == 0 or g == 2:  # do path 1 only for goal 0 and goal 2

                state = start_state
                path_key = str(n) + '_' + str(g) + '_' + str(1) + '.txt'
                expert_data_dict[path_key] = {
                    'state': [],
                    'action': [],
                    'goal': []
                }

                delta = 0 if g < 2 else 1
                action = Action(delta)

                while state.state[1] != grid_height - 1 and state.state[1] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                delta = 3 if g == 0 or g == 3 else 2
                action = Action(delta)

                while state.state[0] != grid_width - 1 and state.state[0] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                assert (state.coordinates in goals)

            # path 2 - go right/left till boundary and then move up/down

            if g == 1:  # do path 2 only for goal 1

                state = start_state
                path_key = str(n) + '_' + str(g) + '_' + str(2) + '.txt'
                expert_data_dict[path_key] = {
                    'state': [],
                    'action': [],
                    'goal': []
                }

                delta = 3 if g == 0 or g == 3 else 2
                action = Action(delta)

                while state.state[0] != grid_width - 1 and state.state[0] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                delta = 0 if g < 2 else 1
                action = Action(delta)

                while state.state[1] != grid_height - 1 and state.state[1] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                assert (state.coordinates in goals)

            # path 3 - go diagonally till obstacle and then
            #          move up/down if x > 10 or right/left if y > 10
            #          and then move right/left or up/down till goal

            if g == 3:  # do path 3 only for goal 3

                state = start_state
                path_key = str(n) + '_' + str(g) + '_' + str(3) + '.txt'
                expert_data_dict[path_key] = {
                    'state': [],
                    'action': [],
                    'goal': []
                }

                delta = g + 4
                action = Action(delta)

                while True:
                    new_state = T(state, action, 0)
                    if new_state.coordinates == state.coordinates:
                        break
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = new_state

                if T(state, Action(2), 0).coordinates == state.coordinates \
                    or T(state, Action(3), 0).coordinates == state.coordinates:

                    delta = 0 if g < 2 else 1
                    action = Action(delta)

                    while state.state[1] != grid_height - 1 and state.state[
                            1] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                    delta = 3 if g == 0 or g == 3 else 2
                    action = Action(delta)

                    while state.state[0] != grid_width - 1 and state.state[
                            0] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                else:

                    delta = 3 if g == 0 or g == 3 else 2
                    action = Action(delta)

                    while state.state[0] != grid_width - 1 and state.state[
                            0] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                    delta = 0 if g < 2 else 1
                    action = Action(delta)

                    while state.state[1] != grid_height - 1 and state.state[
                            1] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                assert (state.coordinates in goals)

    return env_data_dict, expert_data_dict, obstacles, set_diff

示例#5

0

显示文件

parser.add_argument('--clip-epsilon', type=float, default=0.2, metavar='N',
                    help='Clipping for PPO grad')
parser.add_argument('--checkpoint', type=str, required=True,
                    help='path to checkpoint')

args = parser.parse_args()


#-----Environment-----#
width = height = 12
obstacles = create_obstacles(width, height)
set_diff = list(set(product(tuple(range(3, width-3)), repeat=2)) - set(obstacles))
start_loc = sample_start(set_diff)

s = State(start_loc, obstacles)
T = TransitionFunction(width, height, obstacle_movement)

if args.expert_path == 'SR2_expert_trajectories/':
    R = RewardFunction_SR2(-1.0,1.0,width)
else:
    R = RewardFunction(-1.0,1.0)

num_inputs = s.state.shape[0]
num_actions = 4
if args.expert_path == 'SR2_expert_trajectories/':
    num_c = 2
else:
    num_c = 4

#env.seed(args.seed)
torch.manual_seed(args.seed)

示例#6

0

显示文件

 def create_environment(self):
     self.width, self.height = 21, 21
     self.transition_func = TransitionFunction(self.width, self.height,
                                               obstacle_movement)