Пример #1
0
def test_beta_geometric_nbd_model_transactional_data(T, r, alpha, a, b,
                                                     observation_period_end,
                                                     freq, size):
    np.random.seed(188898)
    transaction_data = beta_geometric_nbd_model_transactional_data(
        T=T,
        r=r,
        alpha=alpha,
        a=a,
        b=b,
        observation_period_end=observation_period_end,
        freq=freq,
        size=size)
    actual = summary_data_from_transaction_data(
        transactions=transaction_data,
        customer_id_col="customer_id",
        datetime_col="date",
        observation_period_end=observation_period_end,
        freq=freq,
    )
    np.random.seed(188898)
    expected = beta_geometric_nbd_model(T=T,
                                        r=r,
                                        alpha=alpha,
                                        a=a,
                                        b=b,
                                        size=size)[[
                                            "frequency", "recency", "T"
                                        ]]
    expected["recency"] = expected["recency"].apply(np.ceil)
    expected = expected.reset_index(drop=True)
    actual = actual.reset_index(drop=True)
    assert expected.equals(actual)
Пример #2
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None,
            verbose=False,
            tol=1e-4,
            index=None):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.
            index: index for resulted DataFrame which is accessible via self.data


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            [frequency, scaled_recency, scaled_T, self.penalizer_coef],
            iterative_fitting, initial_params, 4, verbose, tol)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T],
                              columns=['frequency', 'recency', 'T'])
        if index is not None:
            self.data.index = index
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Пример #3
0
    def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            frequency,
            scaled_recency,
            scaled_T,
            iterative_fitting,
            self.penalizer_coef,
            initial_params,
            verbose,
        )

        self.params_ = OrderedDict(zip(["r", "alpha", "a", "b"], params))
        self.params_["alpha"] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T], columns=["frequency", "recency", "T"])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params("r", "alpha", "a", "b"), size=size
        )

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Пример #4
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None,
            verbose=False):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood, frequency, scaled_recency, scaled_T,
            iterative_fitting, self.penalizer_coef, initial_params, verbose)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T],
                              columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Пример #5
0
    def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood,
                                                      [frequency, scaled_recency, scaled_T, self.penalizer_coef],
                                                      iterative_fitting,
                                                      initial_params,
                                                      4,
                                                      verbose,
                                                      tol)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Пример #6
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=0,
            initial_params=None):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.

        Returns:
            self, with additional properties and methods like params_ and plot

        """
        frequency = np.asarray(frequency)
        recency = np.asarray(recency)
        T = np.asarray(T)

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood, frequency, recency, T,
            iterative_fitting, self.penalizer_coef, initial_params)

        self.params_ = dict(zip(['r', 'alpha', 'a', 'b'], params))
        self.data = pd.DataFrame(np.c_[frequency, recency, T],
                                 columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *params, size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self