def main(args): os.makedirs(args.output_dir, exist_ok=True) for k in tqdm.trange(args.count): g = GridWorld(args.max_size, args.max_size) size = 2 * random.randint(args.min_size // 2, args.max_size // 2) + 1 if size < args.max_size: pad = (args.max_size - size) // 2 g._fill_rect(1, 1, pad, args.max_size) g._fill_rect(args.max_size - pad + 1, 1, args.max_size, args.max_size) g._fill_rect(1, 1, args.max_size, pad) g._fill_rect(1, args.max_size - pad + 1, args.max_size, args.max_size) else: pad = 0 wall_count = random.randint(1, args.wall_count) for _ in range(wall_count): is_vert = random.random() > 0.5 wall_coord = random.randint(2, size - 1) wall_len = random.randint(2, size - 2) wall_start = random.randint(1, size - wall_len) if is_vert: g.add_vertical_wall(pad + wall_coord, pad + wall_start, pad + wall_start + wall_len - 1) else: g.add_horizontal_wall(pad + wall_coord, pad + wall_start, pad + wall_start + wall_len - 1) connected_component = list(check_connectivity(g)) random.shuffle(connected_component) start_count = random.randint(1, args.start_count) for _ in range(start_count): g.add_start(*connected_component.pop()) goal_count = random.randint(1, args.goal_count) for _ in range(goal_count): g.add_goal(*connected_component.pop()) trap_count = random.randint(0, args.trap_count) for _ in range(trap_count): g.add_trap(*connected_component.pop()) g.save( os.path.join( args.output_dir, "grid{0:03d}_{1}x{1}_w{2}_s{3}_g{4}_t{5}.pkl".format( k, size, wall_count, start_count, goal_count, trap_count)))
) #premiere couche du reseau de neurones (nbres de neurones en entrre*, nbre de neurones en sortie*) *du layer self.linear_hidden_to_out = nn.Linear(size_inter, size_out) #couche de sortie def forward(self, x): #donner la fonction a effectuer pour passer a la suite h = self.linear_in_to_hidden(x) h = nn.functional.relu( h) #fonction de non linearite (activation function) return self.linear_hidden_to_out(h) if __name__ == "__main__": # Q learning g = GridWorld(4, 4) g.add_start(1, 1) g.add_goal(2, 2) n_s = g.state_space_size n_a = g.action_space_size s = g.reset() discount = 0.9 n_it = 100 epsilon = 0.01 max_t = 500 lr = 0.1 n_size_dataset = 100 #nbr de samples ajoute au dataset a chaque iteration Q = MyMLP(1, 32, 4) optim = torch.optim.SGD(Q.parameters(), lr=0.01) Q_prime = MyMLP(1, 32, 4)