def _fit_exponential_poisson_model( self, point: ReachPoint, Imin: float = None ) -> Tuple[float, KInflatedGammaPoissonDistribution]: """Returns N, alpha, beta of an exponential-poisson model. The fit returned by this function is guaranteed to match the 1+ reach of the input point. If additional frequencies are given, there should be a reasonable match at these as well, although the match is not guaranteed to be perfect. The parameters returned by this function are used to bootstrap the full k-Inflated Gamma Poisson model. Args: point: A ReachPoint to which an exponential-poisson model is to be fit. Imin: Minimum number of inventory impressions that the returned model should have. Returns: A pair (N, dist), where N is the estimated audience size and dist is an estimated KInflatedGammaPoissonDistribution representing an exponential-poisson distribution. """ impressions = point.impressions[0] if Imin is not None and impressions < Imin: impressions = Imin reach = point.reach() if len(point.frequencies) > 1: # Estimate the mean from the distribution found in the reach point mu = impressions / min(reach, impressions - 1) beta = (mu - 1) * 1.2 # 1.2 is arbitrary N = self._exponential_poisson_N_from_beta(impressions, reach, beta) else: N = self._exponential_poisson_N(impressions, reach) beta = self._exponential_poisson_beta(impressions, N, reach) # Check if the chosen values of N, beta result in an impossibly low # number of impressions. If so, adjust them upwards. Imax = N * (beta + 1) while Imax < impressions: N = max(1, 1.1 * N) beta = self._exponential_poisson_beta(impressions, N, reach) Imax = N * (beta + 1) return N, KInflatedGammaPoissonDistribution(1.0, beta, [])
def test_by_impressions(self, mock_gamma_poisson_model): mock_gamma_poisson_model.return_value = (25000, 5.0, 2.0) # Imax = 25000, N = 10000, alpha = 5, beta = 2 h_training = [8124, 5464, 3191, 1679, 815, 371, 159, 64, 23, 6, 0] rp = ReachPoint([20000], h_training, [200.0]) gpm = GammaPoissonModel([rp], max_reach=10000) gpm._fit() rp = gpm.by_impressions([10000], max_frequency=5) h_expected = np.array([9682, 8765, 7353, 5750, 4233]) h_actual = np.array([int(rp.reach(i)) for i in range(1, 6)]) total_error = np.sum((h_expected - h_actual)**2 / h_expected) self.assertAlmostEqual(rp.spends[0], 100.0) for i in range(len(h_actual)): self.assertTrue( (h_actual[i] - h_expected[i])**2 / h_actual[i] < 0.1, f"Discrepancy found at position {i}. " f"Got {h_actual[i]} Expected {h_expected[i]}", )
def test_by_impressions(self, mock_fit_point): mock_fit_point.return_value = ( 10000, KInflatedGammaPoissonDistribution(5.0, 2.0, []), ) # Imax = 25000, N = 10000, alpha = 5, beta = 2 h_training = [7412, 4233, 2014, 842, 320, 112, 37, 11, 2] rp = ReachPoint([15000], h_training, [200.0]) kgpm = KInflatedGammaPoissonModel([rp]) kgpm._fit() rp = kgpm.by_impressions([10000], max_frequency=5) h_expected = np.array([6056, 2629, 925, 283, 78]) h_actual = np.array([int(rp.reach(i)) for i in range(1, 6)]) total_error = np.sum((h_expected - h_actual)**2 / h_expected) self.assertAlmostEqual(rp.spends[0], 133.0, delta=1) for i in range(len(h_actual)): self.assertTrue( (h_actual[i] - h_expected[i])**2 / h_actual[i] < 0.1, f"Discrepancy found at position {i}. " f"Got {h_actual[i]} Expected {h_expected[i]}", )
def test_reach(self): point = ReachPoint([200, 300], [100, 50]) self.assertEqual(point.reach(1), 100) self.assertEqual(point.reach(2), 50) self.assertRaises(ValueError, point.reach, 3)