Пример #1
0
def main(args):
    os.makedirs(args.output_dir, exist_ok=True)

    for k in tqdm.trange(args.count):
        g = GridWorld(args.max_size, args.max_size)
        size = 2 * random.randint(args.min_size // 2, args.max_size // 2) + 1
        if size < args.max_size:
            pad = (args.max_size - size) // 2
            g._fill_rect(1, 1, pad, args.max_size)
            g._fill_rect(args.max_size - pad + 1, 1, args.max_size,
                         args.max_size)
            g._fill_rect(1, 1, args.max_size, pad)
            g._fill_rect(1, args.max_size - pad + 1, args.max_size,
                         args.max_size)
        else:
            pad = 0

        wall_count = random.randint(1, args.wall_count)
        for _ in range(wall_count):
            is_vert = random.random() > 0.5
            wall_coord = random.randint(2, size - 1)
            wall_len = random.randint(2, size - 2)
            wall_start = random.randint(1, size - wall_len)
            if is_vert:
                g.add_vertical_wall(pad + wall_coord, pad + wall_start,
                                    pad + wall_start + wall_len - 1)
            else:
                g.add_horizontal_wall(pad + wall_coord, pad + wall_start,
                                      pad + wall_start + wall_len - 1)

        connected_component = list(check_connectivity(g))
        random.shuffle(connected_component)

        start_count = random.randint(1, args.start_count)
        for _ in range(start_count):
            g.add_start(*connected_component.pop())

        goal_count = random.randint(1, args.goal_count)
        for _ in range(goal_count):
            g.add_goal(*connected_component.pop())

        trap_count = random.randint(0, args.trap_count)
        for _ in range(trap_count):
            g.add_trap(*connected_component.pop())

        g.save(
            os.path.join(
                args.output_dir,
                "grid{0:03d}_{1}x{1}_w{2}_s{3}_g{4}_t{5}.pkl".format(
                    k, size, wall_count, start_count, goal_count, trap_count)))
Пример #2
0
        )  #premiere couche du reseau de neurones (nbres de neurones en entrre*, nbre de neurones en sortie*) *du layer
        self.linear_hidden_to_out = nn.Linear(size_inter,
                                              size_out)  #couche de sortie

    def forward(self,
                x):  #donner la fonction a effectuer pour passer a la suite
        h = self.linear_in_to_hidden(x)
        h = nn.functional.relu(
            h)  #fonction de non linearite (activation function)
        return self.linear_hidden_to_out(h)


if __name__ == "__main__":
    # Q learning
    g = GridWorld(4, 4)
    g.add_start(1, 1)
    g.add_goal(2, 2)
    n_s = g.state_space_size
    n_a = g.action_space_size
    s = g.reset()
    discount = 0.9
    n_it = 100
    epsilon = 0.01
    max_t = 500
    lr = 0.1
    n_size_dataset = 100  #nbr de samples ajoute au dataset a chaque iteration

    Q = MyMLP(1, 32, 4)
    optim = torch.optim.SGD(Q.parameters(), lr=0.01)

    Q_prime = MyMLP(1, 32, 4)