Exemplo n.º 1
0
    def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False):
        """
        This methods fits the data to the Pareto/NBD model.
        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve performances but possibly
                hurt estimates.
            initial_params: set initial params for the iterative fitter.
            verbose: set to true to print out convergence diagnostics.
        Returns:
            self, with additional properties and methods like params_ and plot
        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood,
                                                      [frequency, recency, T, self.penalizer_coef],
                                                      iterative_fitting,
                                                      initial_params,
                                                      4,
                                                      verbose)

        self.params_ = OrderedDict(zip(['r', 'alpha', 's', 'beta'], params))
        self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: pareto_nbd_model(T, *params, size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Exemplo n.º 2
0
    def fit(self, frequency, monetary_value, iterative_fitting=5, initial_params=None, verbose=False):
        """
        This methods fits the data to the Gamma/Gamma model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            monetary_value: the monetary value vector of customer's purchases (denoted m in literature).
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve performances but possibly
                hurt estimates. This model is not very stable so we suggest >10 for best estimates evaluation.
            initial_params: set initial params for the iterative fitter.
            verbose: set to true to print out convergence diagnostics.

        Returns:
            self, fitted and with parameters estimated
        """
        params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood,
                                                      [frequency, monetary_value, self.penalizer_coef],
                                                      iterative_fitting,
                                                      initial_params,
                                                      3,
                                                      verbose)

        self.data = DataFrame(vconcat[frequency, monetary_value], columns=['frequency', 'monetary_value'])
        self.params_ = OrderedDict(zip(['p', 'q', 'v'], params))

        return self
Exemplo n.º 3
0
    def fit(self,
            frequency,
            monetary_value,
            iterative_fitting=5,
            initial_params=None,
            verbose=False):
        """
        This methods fits the data to the Gamma/Gamma model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            monetary_value: the monetary value vector of customer's purchases (denoted m in literature).
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve performances but possibly
                hurt estimates. This model is not very stable so we suggest >10 for best estimates evaluation.
            initial_params: set initial params for the iterative fitter.
            verbose: set to true to print out convergence diagnostics.

        Returns:
            self, fitted and with parameters estimated
        """
        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            [frequency, monetary_value, self.penalizer_coef],
            iterative_fitting, initial_params, 3, verbose)

        self.data = DataFrame(vconcat[frequency, monetary_value],
                              columns=['frequency', 'monetary_value'])
        self.params_ = OrderedDict(zip(['p', 'q', 'v'], params))

        return self
Exemplo n.º 4
0
    def fit(self,
            frequency,
            monetary_value,
            iterative_fitting=4,
            initial_params=None,
            verbose=False,
            tol=1e-4):
        """
        This methods fits the data to the Gamma/Gamma model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            monetary_value: the monetary value vector of customer's purchases (denoted m in literature).
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params.
            initial_params: set initial params for the iterative fitter.
            verbose: set to true to print out convergence diagnostics.
            tol: tolerance for termination of the function minimization process.

        Returns:
            self, fitted and with parameters estimated
        """
        _check_inputs(frequency, monetary_value=monetary_value)

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            [frequency, monetary_value, self.penalizer_coef],
            iterative_fitting, initial_params, 3, verbose, tol)

        self.data = DataFrame(vconcat[frequency, monetary_value],
                              columns=['frequency', 'monetary_value'])
        self.params_ = OrderedDict(zip(['p', 'q', 'v'], params))

        return self
Exemplo n.º 5
0
    def fit(self, frequency, recency, n, n_custs, verbose=False):
        """
        Fit the BG/BB model.

        Parameters:
            frequency: Total periods with observed transactions
            recency: Period of most recent transaction
            n: Number of transaction opportunities
            n_custs: Number of customers with given frequency/recency/T. Fader and Hardie condense
                    the individual RFM matrix into all observed combinations of frequency/recency/T. This
                    parameter represents the count of customers with a given purchase pattern. Instead of
                    calculating individual loglikelihood, the loglikelihood is calculated for each pattern and
                    multiplied by the number of customers with that pattern.

        Returns: self

        """

        frequency = asarray(frequency)
        recency = asarray(recency)
        n = asarray(n)
        n_custs = asarray(n_custs)
        _check_inputs(frequency, recency, n)

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            [frequency, recency, n, n_custs, self.penalizer_coef], 0,
            np.ones(4), 4, verbose)
        self.params_ = OrderedDict(
            zip(['alpha', 'beta', 'gamma', 'delta'], params))
        self.data = DataFrame(vconcat[frequency, recency, n, n_custs],
                              columns=['frequency', 'recency', 'n', 'n_custs'])

        return self
Exemplo n.º 6
0
    def fit(self, frequency, recency, n, n_custs, verbose=False):
        """
        Fit the BG/BB model.

        Parameters:
            frequency: Total periods with observed transactions
            recency: Period of most recent transaction
            n: Number of transaction opportunities
            n_custs: Number of customers with given frequency/recency/T. Fader and Hardie condense
                    the individual RFM matrix into all observed combinations of frequency/recency/T. This
                    parameter represents the count of customers with a given purchase pattern. Instead of
                    calculating individual loglikelihood, the loglikelihood is calculated for each pattern and
                    multiplied by the number of customers with that pattern.

        Returns: self

        """

        frequency = asarray(frequency)
        recency = asarray(recency)
        n = asarray(n)
        n_custs= asarray(n_custs)
        _check_inputs(frequency, recency, n)

        params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood,
                                                      [frequency, recency, n, n_custs, self.penalizer_coef],
                                                      0,
                                                      np.ones(4),
                                                      4,
                                                      verbose)
        self.params_ = OrderedDict(zip(['alpha','beta','gamma','delta'], params))
        self.data = DataFrame(vconcat[frequency, recency, n, n_custs],
                              columns=['frequency','recency','n','n_custs'])

        return self
Exemplo n.º 7
0
    def fit(self, frequency, monetary_value, iterative_fitting=4, initial_params=None, verbose=False, tol=1e-4):
        """
        This methods fits the data to the Gamma/Gamma model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            monetary_value: the monetary value vector of customer's purchases (denoted m in literature).
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params.
            initial_params: set initial params for the iterative fitter.
            verbose: set to true to print out convergence diagnostics.
            tol: tolerance for termination of the function minimization process.

        Returns:
            self, fitted and with parameters estimated
        """
        _check_inputs(frequency, monetary_value=monetary_value)

        params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood,
                                                      [frequency, monetary_value, self.penalizer_coef],
                                                      iterative_fitting,
                                                      initial_params,
                                                      3,
                                                      verbose,
                                                      tol)

        self.data = DataFrame(vconcat[frequency, monetary_value], columns=['frequency', 'monetary_value'])
        self.params_ = OrderedDict(zip(['p', 'q', 'v'], params))

        return self
Exemplo n.º 8
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None,
            verbose=False,
            tol=1e-4,
            index=None):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.
            index: index for resulted DataFrame which is accessible via self.data


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            [frequency, scaled_recency, scaled_T, self.penalizer_coef],
            iterative_fitting, initial_params, 4, verbose, tol)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T],
                              columns=['frequency', 'recency', 'T'])
        if index is not None:
            self.data.index = index
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Exemplo n.º 9
0
    def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            frequency,
            scaled_recency,
            scaled_T,
            iterative_fitting,
            self.penalizer_coef,
            initial_params,
            verbose,
        )

        self.params_ = OrderedDict(zip(["r", "alpha", "a", "b"], params))
        self.params_["alpha"] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T], columns=["frequency", "recency", "T"])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params("r", "alpha", "a", "b"), size=size
        )

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Exemplo n.º 10
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None,
            verbose=False):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood, frequency, scaled_recency, scaled_T,
            iterative_fitting, self.penalizer_coef, initial_params, verbose)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T],
                              columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Exemplo n.º 11
0
    def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood,
                                                      [frequency, scaled_recency, scaled_T, self.penalizer_coef],
                                                      iterative_fitting,
                                                      initial_params,
                                                      4,
                                                      verbose,
                                                      tol)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Exemplo n.º 12
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None,
            verbose=False,
            tol=1e-4):
        """
        This methods fits the data to the Pareto/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params
            initial_params: set initial params for the iterative fitter.
            verbose: set to true to print out convergence diagnostics.

        Returns:
            self, with additional properties and methods like params_ and plot

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            [frequency, recency, T, self.penalizer_coef], iterative_fitting,
            initial_params, 4, verbose, tol)

        self.params_ = OrderedDict(zip(['r', 'alpha', 's', 'beta'], params))
        self.data = DataFrame(vconcat[frequency, recency, T],
                              columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: pareto_nbd_model(
            T, *params, size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Exemplo n.º 13
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None):
        """
        This methods fits the data to the Pareto/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.

        Returns:
            self, with additional properties and methods like params_ and plot

        """
        frequency = np.asarray(frequency)
        recency = np.asarray(recency)
        T = np.asarray(T)

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood, frequency, recency, T,
            iterative_fitting, self.penalizer_coef, initial_params)

        self.params_ = dict(zip(['r', 'alpha', 's', 'beta'], params))
        self.data = pd.DataFrame(np.c_[frequency, recency, T],
                                 columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: pareto_nbd_model(
            T, *params, size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self