def _get_tail_stats(self, X): """ Method to approximate the tail sigma using log-linear extrapolation applied to tail average period """ time_pd = self._get_tail_weighted_time_period(X) reg = WeightedRegression(axis=3).fit(None, np.log(X.sigma_.values), None) sigma_ = np.exp(time_pd*reg.slope_+reg.intercept_) y = X.std_err_.values y[y == 0] = np.nan reg = WeightedRegression(axis=3).fit(None, np.log(y), None) std_err_ = np.exp(time_pd*reg.slope_+reg.intercept_) return sigma_, std_err_
def _get_tail_weighted_time_period(self, X): """ Method to approximate the weighted-average development age of tail using log-linear extrapolation Returns: float32 """ y = X.ldf_.values.copy() y[y <= 1] = np.nan reg = WeightedRegression(axis=3).fit(None, np.log(y - 1), None) tail = np.prod(self.ldf_.values[..., -self._ave_period[0]-1:], -1, keepdims=True) reg = WeightedRegression(axis=3).fit(None, np.log(y - 1), None) time_pd = (np.log(tail-1)-reg.intercept_)/reg.slope_ return time_pd
def _get_tail_stats(self, X): """ Method to approximate the tail sigma using log-linear extrapolation applied to tail average period """ from chainladder.utils.utility_functions import num_to_nan time_pd = self._get_tail_weighted_time_period(X) xp = X.sigma_.get_array_module() reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(X.sigma_.values), None) sigma_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) y = X.std_err_.values y = num_to_nan(y) reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(y), None) std_err_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) return sigma_, std_err_
def _get_x(self, w, y): # For Exponential decay, no transformation on x is needed if self.curve == 'exponential': return None if self.curve == 'inverse_power': reg = WeightedRegression(3, False).fit(None, y, w).infer_x_w() return np.log(reg.x)
def _get_x(self, w, y): # For Exponential decay, no transformation on x is needed if self.curve == "exponential": return None if self.curve == "inverse_power": xp = self.ldf_.get_array_module() reg = WeightedRegression(3, False, xp=xp).fit(None, y, w).infer_x_w() return xp.log(reg.x)
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the tail will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ super().fit(X, y, sample_weight) xp = cp.get_array_module(self.ldf_.values) _y = self.ldf_.values[..., :X.shape[-1] - 1].copy() _w = xp.zeros(_y.shape) if type(self.fit_period) is not slice: raise TypeError('fit_period must be slice.') else: _w[..., self.fit_period] = 1.0 if self.errors == 'ignore': _w[_y <= 1.0] = 0 _y[_y <= 1.0] = 1.01 elif self.errors == 'raise' and xp.any(y < 1.0): raise ZeroDivisionError('Tail fit requires all LDFs to be' + ' greater than 1.0') _y = xp.log(_y - 1) n_obs = X.shape[-1] - 1 k, v = X.shape[:2] _x = self._get_x(_w, _y) # Get LDFs coefs = WeightedRegression(axis=3).fit(_x, _y, _w) slope, intercept = coefs.slope_, coefs.intercept_ extrapolate = xp.cumsum( xp.ones(tuple(list(_y.shape)[:-1] + [self.extrap_periods + n_obs])), -1) tail = self._predict_tail(slope, intercept, extrapolate) if self.attachment_age: attach_idx = xp.min(xp.where(X.ddims >= self.attachment_age)) else: attach_idx = len(X.ddims) - 1 self.ldf_.values = xp.concatenate( (self.ldf_.values[..., :attach_idx], tail[..., attach_idx:]), -1) obj = Development().fit_transform(X) if 'ldf_' not in X else X sigma, std_err = self._get_tail_stats(obj) self.sigma_.values[..., -1] = sigma[..., -1] self.std_err_.values[..., -1] = std_err[..., -1] self.slope_ = slope self.intercept_ = intercept self.cdf_ = DevelopmentBase._get_cdf(self) return self
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the tail will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ super().fit(X, y, sample_weight) _y = self.ldf_.values[..., :-1].copy() _w = np.zeros(_y.shape) if type(self.fit_period) is not slice: raise TypeError('fit_period must be slice.') else: _w[..., self.fit_period] = 1.0 if self.errors == 'ignore': _w[_y <= 1.0] = 0 _y[_y <= 1.0] = 1.01 elif self.errors == 'raise' and np.any(y < 1.0): raise ZeroDivisionError('Tail fit requires all LDFs to be \ greater than 1.0') _y = np.log(_y - 1) n_obs = X.shape[-1] - 1 k, v = X.shape[:2] _x = self._get_x(_w, _y) # Get LDFs coefs = WeightedRegression(axis=3).fit(_x, _y, _w) slope, intercept = coefs.slope_, coefs.intercept_ extrapolate = np.cumsum( np.ones(tuple(list(_y.shape)[:-1] + [self.extrap_periods])), -1) + n_obs tail = self._predict_tail(slope, intercept, extrapolate) self.ldf_.values = self.ldf_.values[..., :-tail.shape[-1]] self.ldf_.values = np.concatenate((self.ldf_.values, tail), -1) if X.__dict__.get('ldf_', None) is None: obj = Development().fit_transform(X) else: obj = X sigma, std_err = self._get_tail_stats(obj) self.sigma_.values[..., -1] = sigma[..., -1] self.std_err_.values[..., -1] = std_err[..., -1] self.slope_ = slope self.intercept_ = intercept self.cdf_ = DevelopmentBase._get_cdf(self) return self
def _get_tail_stats(self, X): """ Method to approximate the tail sigma using log-linear extrapolation applied to tail average period """ from chainladder.utils.utility_functions import num_to_nan if not hasattr(X, 'sigma_'): self.sigma_ = None self.std_err_ = None else: time_pd = self._get_tail_weighted_time_period(X) xp = X.sigma_.get_array_module() reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(X.sigma_.values), None) sigma_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) y = X.std_err_.values y = num_to_nan(y) reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(y), None) std_err_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) self.sigma_.values = xp.concatenate( (self.sigma_.values[..., :-1], sigma_[..., -1:]), axis=-1) self.std_err_.values = xp.concatenate( (self.std_err_.values[..., :-1], std_err_[..., -1:]), axis=-1)
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the munich adjustment will be applied. y : None Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ if X.array_backend == "sparse": X = X.set_backend("numpy") else: X = X.copy() xp = X.get_array_module() from chainladder.utils.utility_functions import num_to_nan if type(X.ddims) != np.ndarray: raise ValueError( "Triangle must be expressed with development lags") if self.fillna: tri_array = num_to_nan((X + self.fillna).values) else: tri_array = num_to_nan(X.values.copy()) if type(self.average) is not list: self.average_ = np.array([self.average] * (tri_array.shape[-1] - 1)) else: self.average_ = np.array(self.average) if type(self.n_periods) is not list: n_periods = [self.n_periods] * (tri_array.shape[-1] - 1) else: n_periods = self.n_periods n_periods = np.array(n_periods) self.n_periods_ = n_periods weight_dict = {"regression": 0, "volume": 1, "simple": 2} x, y = tri_array[..., :-1], tri_array[..., 1:] val = xp.nan_to_num( xp.array([weight_dict.get(item, item) for item in self.average_])[None, None, None] * (y * 0 + 1)) link_ratio = y / x self.w_ = xp.array( self._assign_n_periods_weight(X) * self._drop_adjustment(X, link_ratio)) w = self.w_ / (x**(val)) params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w) if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: warnings.warn("Setting n_periods=1 does not allow enough degrees " "of freedom to support calculation of all regression" " statistics. Only LDFs have been calculated.") params.std_err_ = xp.nan_to_num(params.std_err_) + xp.nan_to_num( (1 - xp.nan_to_num(params.std_err_ * 0 + 1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x**(2 - val))[..., 0:1, :], -1, -2)) params = xp.concatenate( (params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(X, params, 0) self.sigma_ = self._param_property(X, params, 1) self.std_err_ = self._param_property(X, params, 2) resid = -X.iloc[..., :-1] * self.ldf_.values + X.iloc[..., 1:].values std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_**2).values) resid = resid / std self.std_residuals_ = resid[resid.valuation < X.valuation_date] return self
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the tail will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ from chainladder.utils.utility_functions import num_to_nan if X.array_backend == "sparse": X = X.set_backend("numpy") else: X = X.copy() xp = X.get_array_module() if type(self.fit_period) == slice: warnings.warn( "Slicing for fit_period is deprecated and will be removed. Please use a tuple (start_age, end_age)." ) fit_period = self.fit_period else: grain = {"Y": 12, "Q": 3, "M": 1}[X.development_grain] start = (None if self.fit_period[0] is None else int(self.fit_period[0] / grain - 1)) end = (None if self.fit_period[1] is None else int(self.fit_period[1] / grain - 1)) fit_period = slice(start, end, None) super().fit(X, y, sample_weight) xp = self.ldf_.get_array_module() _y = self.ldf_.values[..., :X.shape[-1] - 1].copy() _w = xp.zeros(_y.shape) _w[..., fit_period] = 1.0 if self.reg_threshold[0] is None: warnings.warn("Lower threshold for ldfs not set. Lower threshold will be set to 1.0 to ensure" \ "valid inputs for regression.") lower_threshold = 1 elif self.reg_threshold[0] < 1: warnings.warn("Lower threshold for ldfs set too low (<1). Lower threshold will be set to 1.0 to ensure" \ "valid inputs for regression.") lower_threshold = 1 else: lower_threshold = self.reg_threshold[0] if self.reg_threshold[1] is not None: if self.reg_threshold[1] <= lower_threshold: warnings.warn( "Can't set upper threshold for ldfs below lower threshold. Upper threshold will be set to 'None'." ) upper_threshold = None else: upper_threshold = self.reg_threshold[1] else: upper_threshold = self.reg_threshold[1] if self.errors == "ignore": if upper_threshold is None: _w[_y <= lower_threshold] = 0 _y[_y <= lower_threshold] = 1.01 else: _w[(_y <= lower_threshold) | (_y > upper_threshold)] = 0 _y[(_y <= lower_threshold) | (_y > upper_threshold)] = 1.01 elif self.errors == "raise" and xp.any(y < 1.0): raise ZeroDivisionError( "Tail fit requires all LDFs to be greater than 1.0") _y = xp.log(_y - 1) n_obs = X.shape[-1] - 1 k, v = X.shape[:2] _x = self._get_x(_w, _y) # Get LDFs coefs = WeightedRegression(axis=3, xp=xp).fit(_x, _y, _w) self._slope_, self._intercept_ = coefs.slope_, coefs.intercept_ extrapolate = xp.cumsum( xp.ones(tuple(list(_y.shape)[:-1] + [self.extrap_periods + n_obs])), -1) tail = self._predict_tail(extrapolate) if self.attachment_age: attach_idx = xp.min(xp.where(X.ddims >= self.attachment_age)) else: attach_idx = len(X.ddims) - 1 self.ldf_.values = xp.concatenate( (self.ldf_.values[..., :attach_idx], tail[..., attach_idx:]), -1) obj = Development().fit_transform(X) if "ldf_" not in X else X self._get_tail_stats(obj) return self
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the munich adjustment will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ if X.array_backend == 'sparse': X = X.set_backend('numpy') else: X = copy.deepcopy(X) xp = X.get_array_module() from chainladder.utils.utility_functions import num_to_nan if (type(X.ddims) != np.ndarray): raise ValueError( 'Triangle must be expressed with development lags') if self.fillna: tri_array = num_to_nan((X + self.fillna).values) else: tri_array = num_to_nan(X.values.copy()) if type(self.average) is not list: average = [self.average] * (tri_array.shape[-1] - 1) else: average = self.average average = np.array(average) self.average_ = average if type(self.n_periods) is not list: n_periods = [self.n_periods] * (tri_array.shape[-1] - 1) else: n_periods = self.n_periods n_periods = np.array(n_periods) self.n_periods_ = n_periods weight_dict = {'regression': 0, 'volume': 1, 'simple': 2} x, y = tri_array[..., :-1], tri_array[..., 1:] val = xp.array([weight_dict.get(item.lower(), 1) for item in average]) for i in [2, 1, 0]: val = xp.repeat(val[None], tri_array.shape[i], axis=0) val = xp.nan_to_num(val * (y * 0 + 1)) link_ratio = y / x self.w_ = xp.array( self._assign_n_periods_weight(X) * self._drop_adjustment(X, link_ratio)) w = self.w_ / (x**(val)) params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w) if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: warnings.warn('Setting n_periods=1 does not allow enough degrees ' 'of freedom to support calculation of all regression' ' statistics. Only LDFs have been calculated.') params.std_err_ = xp.nan_to_num(params.std_err_) + \ xp.nan_to_num( (1-xp.nan_to_num(params.std_err_*0+1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x**(2-val))[..., 0:1, :], -1, -2)) params = xp.concatenate( (params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(X, params, 0) self.sigma_ = self._param_property(X, params, 1) self.std_err_ = self._param_property(X, params, 2) return self
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the tail will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ if type(self.fit_period) == slice: warnings.warn( "Slicing for fit_period is deprecated and will be removed. Please use a tuple (start_age, end_age)." ) fit_period = self.fit_period else: grain = {'Y': 12, 'Q': 3, 'M': 1}[X.development_grain] start = None if self.fit_period[0] is None else int( self.fit_period[0] / grain - 1) end = None if self.fit_period[1] is None else int( self.fit_period[1] / grain - 1) fit_period = slice(start, end, None) super().fit(X, y, sample_weight) xp = cp.get_array_module(self.ldf_.values) _y = self.ldf_.values[..., :X.shape[-1] - 1].copy() _w = xp.zeros(_y.shape) _w[..., fit_period] = 1.0 if self.errors == 'ignore': _w[_y <= 1.0] = 0 _y[_y <= 1.0] = 1.01 elif self.errors == 'raise' and xp.any(y < 1.0): raise ZeroDivisionError( 'Tail fit requires all LDFs to be greater than 1.0') _y = xp.log(_y - 1) n_obs = X.shape[-1] - 1 k, v = X.shape[:2] _x = self._get_x(_w, _y) # Get LDFs coefs = WeightedRegression(axis=3).fit(_x, _y, _w) self._slope_, self._intercept_ = coefs.slope_, coefs.intercept_ extrapolate = xp.cumsum( xp.ones(tuple(list(_y.shape)[:-1] + [self.extrap_periods + n_obs])), -1) tail = self._predict_tail(extrapolate) if self.attachment_age: attach_idx = xp.min(xp.where(X.ddims >= self.attachment_age)) else: attach_idx = len(X.ddims) - 1 self.ldf_.values = xp.concatenate( (self.ldf_.values[..., :attach_idx], tail[..., attach_idx:]), -1) obj = Development().fit_transform(X) if 'ldf_' not in X else X sigma, std_err = self._get_tail_stats(obj) self.sigma_.values[..., -1] = sigma[..., -1] self.std_err_.values[..., -1] = std_err[..., -1] return self
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the munich adjustment will be applied. y : None Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ from chainladder.utils.utility_functions import num_to_nan # Validate inputs if X.is_cumulative == False: obj = self._set_fit_groups(X).incr_to_cum().val_to_dev().copy() else: obj = self._set_fit_groups(X).val_to_dev().copy() xp = obj.get_array_module() # Make sure it is a dev tri if type(obj.ddims) != np.ndarray: raise ValueError("Triangle must be expressed with development lags") # validate hyperparameters if self.fillna: tri_array = num_to_nan((obj + self.fillna).values) else: tri_array = num_to_nan(obj.values.copy()) self.average_ = np.array( self._validate_axis_assumption(self.average, obj.development[:-1])) n_periods_ = self._validate_axis_assumption(self.n_periods, obj.development[:-1]) weight_dict = {"regression": 0, "volume": 1, "simple": 2} x, y = tri_array[..., :-1], tri_array[..., 1:] exponent = xp.array([weight_dict.get(item, item) for item in self.average_]) exponent = xp.nan_to_num(exponent[None, None, None] * (y * 0 + 1)) link_ratio = y / x self.w_ = (self._assign_n_periods_weight(obj, n_periods_) * self._drop_adjustment(obj, link_ratio)) w = self.w_ / (x ** (exponent)) params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w) if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: warnings.warn( "Setting n_periods=1 does not allow enough degrees " "of freedom to support calculation of all regression" " statistics. Only LDFs have been calculated." ) params.std_err_ = xp.nan_to_num(params.std_err_) + xp.nan_to_num( (1 - xp.nan_to_num(params.std_err_ * 0 + 1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x ** (2 - exponent))[..., 0:1, :], -1, -2) ) params = xp.concatenate((params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(obj, params, 0) self.sigma_ = self._param_property(obj, params, 1) self.std_err_ = self._param_property(obj, params, 2) resid = -obj.iloc[..., :-1] * self.ldf_.values + obj.iloc[..., 1:].values std = xp.sqrt((1/num_to_nan(w))*(self.sigma_**2).values) resid = resid/std self.std_residuals_ = resid[resid.valuation < obj.valuation_date] return self