def solve_with_info(self):
        cube, get_gen_trajectory_act_list = self.generator.generate_cube_with_info(
            self.move_depth)
        trajectory = []

        self.gen_trajectory_act_list += get_gen_trajectory_act_list

        for i in range(self.move_depth):
            action = self.get_action(
                torch.from_numpy(one_hot_code(cube)).to(self.device))
            cube(action)

            trajectory.append(action)
            self.act_occ_list[ACTIONS.index(action)] += 1

            if cube != SOLVED_CUBE:
                #if i == self.move_depth-1:
                #    print(f"{trajectory} vs {get_gen_trajectory_act_list}")
                None
            else:
                self.win_counter += 1
                # print(trajectory)
                for act in trajectory:
                    # print(ACTIONS.index(act))
                    self.win_act_occ_list[ACTIONS.index(act)] += 1
                break
    def solve(self):
        cube = self.generator.generate_cube(self.move_depth)
        for num in range(self.move_depth):
            cube(
                self.get_action(
                    torch.from_numpy(one_hot_code(cube)).to(self.device)))

            if cube != SOLVED_CUBE:
                None
            else:
                self.win_counter += 1
                break
Пример #3
0
    def max_mover_solve(self, number_of_tests=1000, modifier=10):
        cube = self.generator.generate_cube(self.move_depth)
        for _ in range(self.move_depth * modifier):
            cube(
                self.get_action(
                    torch.from_numpy(one_hot_code(cube)).to(self.device)))

            if cube != SOLVED_CUBE:
                None
            else:
                self.win_counter += 1
                break
Пример #4
0
 def visual(self):
     cube = self.generator.generate_cube(self.move_depth)
     print(chr(27) + "[1J")
     print(f"Shuffled cube at depth level {self.move_depth}:\n{repr(cube)}")
     time.sleep(2)
     print("Solving cube")
     for _ in range(self.move_depth):
         print(
             repr(
                 cube(
                     self.get_action(
                         torch.from_numpy(one_hot_code(cube)).to(
                             self.device)))))
         time.sleep(1)
            memory.new_full_buffer(replay_shuffle_range)

            while time < epoch_time:
                # REPLAY
                if replay_time > 0 or np.random.random() <= replay_chance:

                    # Get random cube and the reverse of the actions that led to it
                    cube, reverse_actions = memory.generate_random_cube()
                    reverse_actions = reverse_actions[:len(reverse_actions) -
                                                      stop]
                    depth = len(reverse_actions)

                    if self.adam_optim is not None:

                        for i in range(depth):
                            input = torch.from_numpy(one_hot_code(cube)).to(
                                self.device)

                            act, table_online = self.get_best_act_array(
                                input, Network.Online)
                            val_online = table_online[act]

                            # TODO: convert input, network, act -> input, act, network
                            val_target = self.get_val(input, act,
                                                      Network.Target)

                            correct_act = reverse_actions[depth - i - 1]

                            reward, reward_vector = self.experience_reward(
                                ACTIONS[act], correct_act)