Exemplo n.º 1
0
    def optimal(self, epsilon=10**-9):
        """Compute optimal play for pig game
        """
        goal = self.values['goal']

        def pwin(probabilities, i, j, k):
            """Probability of winning for a given situation
            """
            if i + k >= goal:
                return 1.0
            elif j >= goal:
                return 0.0
            else:
                return probabilities[i][j][k]

        def action_probs(indexes, probabilities):
            """Compute the probability of winning for the different
            possibles actions
            """
            roll = 0.0
            i, j, k = indexes
            wrong = self.data.get(['dice', 'wrong'])
            for value in range(1, self.values['dice'] + 1):
                if value in wrong:
                    roll += (1.0 - pwin(probabilities, j, i, 0))
                else:
                    roll += pwin(probabilities, i, j, k + value)
            roll /= self.values['dice']
            return [
                    (True, roll),
                    (False, 1 - pwin(probabilities, j, i + k, 0)),
                    ]

        values = markov.value_iteration([
            lambda : goal,
            lambda i : goal,
            lambda i, j : goal - i,
            ], epsilon, action_probs, True)

        result = list()

        for i in range(goal):
            result.append(list())
            for j in range(goal):
                try:
                    todo = min(k for k in range(goal) if not values[i][j][k])
                    result[i].append(todo)
                except ValueError:
                    result[i].append(goal)

        return result
Exemplo n.º 2
0
    def optimal(self, epsilon=10**-5, max_dice=50):
        """Determine optimal play using markov process
        """
        def pwin(probabilities, i, j):
            """Compute probability to win for a given situation
            """
            if i >= self.goal:
                return 1.0
            elif j >= self.goal:
                return 0.0
            else:
                return probabilities[i][j]

        dice_probs = dices.dice_probability(self.dice, max_dice, self.wrong)

        def action_probs(indexes, probabilities):
            """Compute the probability of winning for the different
            possibles actions
            """
            probs = list()
            i, j = indexes
            for k in range(1, max_dice + 1):
                total_prob = self.dice - len(self.wrong)
                total_prob = 1 - (total_prob / self.dice) ** k
                roll = total_prob * (1 - pwin(probabilities, j, i))
                for result, prob in dice_probs[k]:
                    roll += prob * (1 - pwin(probabilities, j, i + result))
                probs.append((k, roll))

            return probs

        result = markov.value_iteration([
            lambda : self.goal,
            lambda i : self.goal,
            ], epsilon, action_probs, True)

        return result