Пример #1
0
def test_BGBB_integration_in_models_with_uncertainties():
    T = 10
    size = 100
    params = {'alpha': 1.2, 'beta': 0.7, 'gamma': 0.6, 'delta': 2.7}

    data = gen.bgbb_model(T,
                          params['alpha'],
                          params['beta'],
                          params['gamma'],
                          params['delta'],
                          size=size)

    data = compress_data(data)

    model = models.BGBBModel()

    model.fit(data['frequency'],
              data['recency'],
              data['T'],
              bootstrap_size=10,
              N=data['N'],
              initial_params=params.values())

    print("Generation params")
    print(params)

    print("Fitted params")
    print(model.params)
    print(model.params_C)

    print("Uncertain parameters")
    print(model.uparams)

    print("E[X(t)] as a function of t")
    for t in [0, 1, 10, 100, 1000, 10000]:
        uEx = model.expected_number_of_purchases_up_to_time(t)
        print(t, uEx)
        assert uEx.n >= -0.0001
        assert uEx.s >= -0.0001

    t = 10
    print("E[X(t) = n] as a function of n, t = " + str(t))
    tot_prob = 0.0
    for n in range(t + 1):
        prob = model.fitter.probability_of_n_purchases_up_to_time(t, n)
        print(n, prob)
        tot_prob += prob
        assert 1 >= prob >= 0

        uprob = model.probability_of_n_purchases_up_to_time(t, n)
        print(uprob)
        assert is_almost_equal(uprob.n, prob)

    assert math.fabs(tot_prob - 1.0) < 0.00001
Пример #2
0
def test_BGBB_correlations_preserved():
    T = 10
    size = 100
    params = {'alpha': 1.2, 'beta': 0.7, 'gamma': 0.6, 'delta': 2.7}

    data = gen.bgbb_model(T,
                          params['alpha'],
                          params['beta'],
                          params['gamma'],
                          params['delta'],
                          size=size)

    data = compress_data(data)

    model = models.BGBBModel()

    model.fit(data['frequency'],
              data['recency'],
              data['T'],
              bootstrap_size=10,
              N=data['N'],
              initial_params=params.values())

    print("Generation params")
    print(params)

    print("Fitted params")
    print(model.params)
    print(model.params_C)

    print("Uncertain parameters")
    print(model.uparams)

    assert is_almost_equal(
        correlation_matrix([model.uparams['alpha'],
                            model.uparams['alpha']])[0, 1], 1.0)
    assert 1.0 > correlation_matrix([
        model.uparams['alpha'] + ufloat(1, 1), model.uparams['alpha']
    ])[0, 1] > 0.0

    # stub of profile
    p1 = model.expected_number_of_purchases_up_to_time(1)
    p2 = model.expected_number_of_purchases_up_to_time(2)

    assert 1.0 > correlation_matrix([p1, p2])[0, 1] > 0.0
Пример #3
0
def test_BGBB_likelihood_compressed(T, size):
    params = {'alpha': 0.56, 'beta': 1.17, 'gamma': 0.38, 'delta': 1.13}

    data = gen.bgbb_model(T,
                          params['alpha'],
                          params['beta'],
                          params['gamma'],
                          params['delta'],
                          size=size)

    data = compress_data(data)

    model = models.BGBBModel()

    n = len(data)
    n_samples = ct.c_int(n)
    int_n_size_array = ct.c_float * n

    x = int_n_size_array(*data['frequency'])
    tx = int_n_size_array(*data['recency'])
    T = int_n_size_array(*data['T'])
    N = int_n_size_array(*data['N'])

    start_c = timeit.default_timer()

    likelihood_c = model.fitter._c_negative_log_likelihood(
        params.values(), x, tx, T, N, n_samples)
    c_time = timeit.default_timer() - start_c
    print("C_time: " + str(c_time))
    print("Likelihood: " + str(likelihood_c))

    start_py = timeit.default_timer()
    likelihood_py = model.fitter._negative_log_likelihood(params.values(),
                                                          data['frequency'],
                                                          data['recency'],
                                                          data['T'],
                                                          penalizer_coef=0,
                                                          N=N)
    py_time = timeit.default_timer() - start_py
    print("Py_time: " + str(py_time))
    print("Likelihood: " + str(likelihood_py))
Пример #4
0
def test_BGBBBGExt_fitting_on_simulated_quite_real_looking_data():
    T = 10
    T_lagged = 0
    T0 = 52

    sizes_installs = [500, 1000, 5000, 10000, 25000, 50000, 100000]  # [500, 1000]  #
    n_sim = 10
    iterative_fitting = 0
    penalizer_coef = 0.1

    success_fit_ratio = {}
    success_fit = {}
    arpd_estimates = {}
    lifetime_estimates = {}
    conversion_estimates = {}
    appd_estimates = {}
    apppu_estimates = {}
    arppu_estimates = {}

    for size_installs in sizes_installs:

        success_fit[size_installs] = 0
        arpd_estimates[size_installs] = []
        lifetime_estimates[size_installs] = []
        conversion_estimates[size_installs] = []
        appd_estimates[size_installs] = []
        apppu_estimates[size_installs] = []
        arppu_estimates[size_installs] = []

        for n in range(n_sim):

            # size_installs = 5000
            size_purchasers = size_installs / 20

            a, b, g, d, e, z, c0, a2, b2, g2, d2 = 1.13, 0.32, 0.63, 3.2, 0.05, 6.78, 0.04, 0.33, 1.75, 1.88, 7.98

            params_conversion = {'alpha': a, 'beta': b, 'gamma': g, 'delta': d, 'epsilon': e, 'zeta': z, 'c0': c0}
            params_arppu = {'alpha': a2, 'beta': b2, 'gamma': g2, 'delta': d2}

            n_cohorts = T - T_lagged + 1

            data_conversion = gen.bgbbbgext_model(T, params_conversion['alpha'], params_conversion['beta'],
                                                  params_conversion['gamma'], params_conversion['delta'],
                                                  params_conversion['epsilon'],
                                                  params_conversion['zeta'], params_conversion['c0'],
                                                  size=size_installs / n_cohorts,
                                                  time_first_purchase=True)

            data_arppu = gen.bgbb_model(T, params_arppu['alpha'], params_arppu['beta'],
                                        params_arppu['gamma'], params_arppu['delta'],
                                        size=size_purchasers / n_cohorts)

            for Ti in range(T_lagged, T):
                data_conversion_new = gen.bgbbbgext_model(Ti, params_conversion['alpha'], params_conversion['beta'],
                                                          params_conversion['gamma'], params_conversion['delta'],
                                                          params_conversion['epsilon'],
                                                          params_conversion['zeta'], params_conversion['c0'],
                                                          size=size_installs / n_cohorts,
                                                          time_first_purchase=True)
                data_arppu_new = gen.bgbb_model(Ti, params_arppu['alpha'], params_arppu['beta'],
                                                params_arppu['gamma'], params_arppu['delta'],
                                                size=size_purchasers / n_cohorts)
                data_conversion = pd.concat([data_conversion, data_conversion_new])
                data_arppu = pd.concat([data_arppu, data_arppu_new])

            mv_values = gen.sample_monetary_values(size_purchasers)

            compressed_data_conversion = compress_session_session_before_conversion_data(data_conversion)
            compressed_data_arppu = compress_data(data_arppu)

            model_conversion = mod.BGBBBGExtModel(penalizer_coef)
            model_conversion.fit(frequency=compressed_data_conversion['frequency'],
                                 recency=compressed_data_conversion['recency'], T=compressed_data_conversion['T'],
                                 frequency_before_conversion=compressed_data_conversion['frequency_before_conversion'],
                                 N=compressed_data_conversion['N'], initial_params=params_conversion.values(),
                                 iterative_fitting=iterative_fitting)

            model_arppu = mod.BGBBModel(penalizer_coef)
            model_arppu.fit(frequency=compressed_data_arppu['frequency'], recency=compressed_data_arppu['recency'],
                            T=compressed_data_arppu['T'],
                            N=compressed_data_arppu['N'], initial_params=params_arppu.values(),
                            iterative_fitting=iterative_fitting)

            mv = ufloat(np.mean(mv_values), np.std(mv_values) / math.sqrt(len(mv_values)))

            print("Conversion parameters")
            print(params_conversion)
            print(model_conversion.params)
            print(model_conversion.uparams)

            print("Arppu parameters")
            print(params_arppu)
            print(model_arppu.params)
            print(model_arppu.uparams)

            print("Monetary values")
            print(mv)

            ts = range(T0)
            lifetime = [ufloat_to_tuple(model_conversion.expected_number_of_sessions_up_to_time(t)) for t in ts]
            conversion_diff = [ufloat_to_tuple(model_conversion.expected_probability_of_converting_at_time(t)) for t in ts]
            conversion = [ufloat_to_tuple(model_conversion.expected_probability_of_converting_within_time(t)) for t in ts]
            apppu = [ufloat_to_tuple(model_arppu.expected_number_of_purchases_up_to_time(t)) for t in ts]
            arppu = [ufloat_to_tuple((1.0 + apppu[i][0]) * mv) for i in range(len(apppu))]
            appd = [ufloat_to_tuple(get_arpd_retention(model_conversion, model_arppu, t)) for t in ts]
            arpd = [ufloat_to_tuple(appd[i][0] * mv) for i in range(len(appd))]

            print(ts)
            print(lifetime)
            print(conversion_diff)
            print(conversion)
            print(apppu)
            print(arppu)
            print(appd)
            print(arpd)

            summary_df = pd.DataFrame({
                'lifetime': [v[0] + 1 for v in lifetime],
                'lifetime_err': [v[1] for v in lifetime],
                'conversion_diff': [v[0] for v in conversion_diff],
                'conversion_diff_err': [v[1] for v in conversion_diff],
                'conversion': [v[0] for v in conversion],
                'conversion_err': [v[1] for v in conversion],
                'apppu': [v[0] + 1 for v in apppu],
                'apppu_err': [v[1] for v in apppu],
                'arppu': [v[0] for v in arppu],
                'arppu_err': [v[1] for v in arppu],
                'appd': [v[0] for v in appd],
                'appd_err': [v[1] for v in appd],
                'arpd': [v[0] for v in arpd],
                'arpd_err': [v[1] for v in arpd],
                'true_lifetime': [get_true_lifetime(a, b, g, d, t) for t in ts],
                'true_conversion': [get_true_conversion(a, b, g, d, e, z, c0, t) for t in ts],
                'true_apppu': [get_true_apppu(a2, b2, g2, d2, t) for t in ts],
                'true_arppu': [get_true_arppu(a2, b2, g2, d2, true_mv, t) for t in ts],
                'true_appd': [get_true_appd(a, b, g, d, e, z, c0, a2, b2, g2, d2, t) for t in ts],
                'true_arpd': [get_true_arpd(a, b, g, d, e, z, c0, a2, b2, g2, d2, true_mv, t) for t in ts],
            })

            with open("/Users/marcomeneghelli/Desktop/arpd_simulations/" + str(size_installs) + "/pars_simdata_" + str(
                    size_installs) + "_" + str(
                T) + "iterative_fitting" + str(iterative_fitting) + "_" + str(n) + ".txt", "w") as text_file:
                text_file.write(str(model_conversion.params))
                text_file.write(str(model_arppu.params))
                text_file.write(str((mv)))

            summary_df.to_csv(
                "/Users/marcomeneghelli/Desktop/arpd_simulations/" + str(size_installs) + "/arpd_simdata_" + str(
                    size_installs) + "_" + str(
                    T) + "iterative_fitting" + str(iterative_fitting) + "_" + str(n) + ".csv")

            last_arpd = arpd[-1][0]
            last_lifetime = lifetime[-1][0]
            last_conversion = conversion[-1][0]
            last_appd = appd[-1][0]
            last_apppu = apppu[-1][0]
            last_arppu = arppu[-1][0]

            if not (math.isnan(last_arpd) or last_arpd is None):
                success_fit[size_installs] += 1

            arpd_estimates[size_installs].append(last_arpd)
            lifetime_estimates[size_installs].append(last_lifetime)
            conversion_estimates[size_installs].append(last_conversion)
            appd_estimates[size_installs].append(last_appd)
            apppu_estimates[size_installs].append(last_apppu)
            arppu_estimates[size_installs].append(last_arppu)

        success_fit_ratio[size_installs] = float(success_fit[size_installs]) / n_sim

        summary_size_installs_df = pd.DataFrame({
            'success_fit_ratio': success_fit_ratio[size_installs],
            'arpd_estimates': arpd_estimates[size_installs],
            'lifetime_estimates': lifetime_estimates[size_installs],
            'conversion_estimates': conversion_estimates[size_installs],
            'appd_estimates': appd_estimates[size_installs],
            'apppu_estimates': apppu_estimates[size_installs],
            'arppu_estimates': arppu_estimates[size_installs],
        })

        summary_size_installs_df.to_csv(
            "/Users/marcomeneghelli/Desktop/arpd_simulations/arpd_simdata_last_measurements_" + str(
                size_installs) + "_" + str(
                T) + "iterative_fitting" + str(iterative_fitting) + ".csv")