Exemplo n.º 1
0
def test_grid(clrd):
    # Load Data
    medmal_paid = clrd.groupby("LOB").sum().loc["medmal"]["CumPaidLoss"]
    medmal_prem = (clrd.groupby("LOB").sum().loc["medmal"]
                   ["EarnedPremDIR"].latest_diagonal)

    # Pipeline
    dev = cl.Development()
    tail = cl.TailCurve()
    benk = cl.Benktander()

    steps = [("dev", dev), ("tail", tail), ("benk", benk)]
    pipe = cl.Pipeline(steps)

    # Prep Benktander Grid Search with various assumptions, and a scoring function
    param_grid = dict(benk__n_iters=[250], benk__apriori=[1.00])
    scoring = {"IBNR": lambda x: x.named_steps.benk.ibnr_.sum()}

    grid = cl.GridSearch(pipe, param_grid, scoring=scoring)
    # Perform Grid Search
    grid.fit(medmal_paid, benk__sample_weight=medmal_prem)
    assert (grid.results_["IBNR"][0] == cl.Benktander(
        n_iters=250, apriori=1).fit(
            cl.TailCurve().fit_transform(
                cl.Development().fit_transform(medmal_paid)),
            sample_weight=medmal_prem,
        ).ibnr_.sum())
Exemplo n.º 2
0
def test_grid():
    # Load Data
    clrd = cl.load_sample('clrd')
    medmal_paid = clrd.groupby('LOB').sum().loc['medmal']['CumPaidLoss']
    medmal_prem = clrd.groupby(
        'LOB').sum().loc['medmal']['EarnedPremDIR'].latest_diagonal
    medmal_prem.rename('development', ['premium'])

    # Pipeline
    dev = cl.Development()
    tail = cl.TailCurve()
    benk = cl.Benktander()

    steps = [('dev', dev), ('tail', tail), ('benk', benk)]
    pipe = cl.Pipeline(steps)

    # Prep Benktander Grid Search with various assumptions, and a scoring function
    param_grid = dict(benk__n_iters=[250], benk__apriori=[1.00])
    scoring = {'IBNR': lambda x: x.named_steps.benk.ibnr_.sum()}

    grid = cl.GridSearch(pipe, param_grid, scoring=scoring)
    # Perform Grid Search
    grid.fit(medmal_paid, benk__sample_weight=medmal_prem)
    assert grid.results_['IBNR'][0] == \
        cl.Benktander(n_iters=250, apriori=1).fit(cl.TailCurve().fit_transform(cl.Development().fit_transform(medmal_paid)), sample_weight=medmal_prem).ibnr_.sum()
Exemplo n.º 3
0
def test_pipeline():
    tri = cl.load_sample('clrd').groupby('LOB').sum()[[
        'CumPaidLoss', 'IncurLoss', 'EarnedPremDIR'
    ]]
    tri['CaseIncurredLoss'] = tri['IncurLoss'] - tri['CumPaidLoss']

    X = tri[['CumPaidLoss', 'CaseIncurredLoss']]
    sample_weight = tri['EarnedPremDIR'].latest_diagonal

    dev = [
        cl.Development(),
        cl.ClarkLDF(),
        cl.Trend(),
        cl.IncrementalAdditive(),
        cl.MunichAdjustment(paid_to_incurred=('CumPaidLoss',
                                              'CaseIncurredLoss')),
        cl.CaseOutstanding(paid_to_incurred=('CumPaidLoss',
                                             'CaseIncurredLoss'))
    ]
    tail = [cl.TailCurve(), cl.TailConstant(), cl.TailBondy(), cl.TailClark()]
    ibnr = [
        cl.Chainladder(),
        cl.BornhuetterFerguson(),
        cl.Benktander(n_iters=2),
        cl.CapeCod()
    ]

    for model in list(itertools.product(dev, tail, ibnr)):
        print(model)
        cl.Pipeline(
            steps=[('dev',
                    model[0]), ('tail',
                                model[1]), ('ibnr', model[2])]).fit_predict(
                                    X, sample_weight=sample_weight).ibnr_.sum(
                                        'origin').sum('columns').sum()
Exemplo n.º 4
0
def test_pipeline_json_io():
    pipe = cl.Pipeline(
        steps=[('dev', cl.Development()), ('model', cl.BornhuetterFerguson())])
    pipe2 = cl.read_json(pipe.to_json())
    assert {item[0]: item[1].get_params()
            for item in pipe.get_params()['steps']} == \
           {item[0]: item[1].get_params()
            for item in pipe2.get_params()['steps']}
Exemplo n.º 5
0
def test_pipeline(tri, dev, tail, ibnr):
    X = tri[['CumPaidLoss', 'CaseIncurredLoss']]
    sample_weight = tri['EarnedPremDIR'].latest_diagonal
    cl.Pipeline(steps=[('dev',
                        dev()), ('tail',
                                 tail()), ('ibnr', ibnr())]).fit_predict(
                                     X, sample_weight=sample_weight).ibnr_.sum(
                                         'origin').sum('columns').sum()
Exemplo n.º 6
0
def test_pipeline_json_io():
    pipe = cl.Pipeline(
        steps=[("dev", cl.Development()), ("model", cl.BornhuetterFerguson())]
    )
    pipe2 = cl.read_json(pipe.to_json())
    assert {item[0]: item[1].get_params() for item in pipe.get_params()["steps"]} == {
        item[0]: item[1].get_params() for item in pipe2.get_params()["steps"]
    }
def get_triangle_projections(triangles,
                             average_methods=None,
                             n_periods=None,
                             grain='OYDY'):
    """
        Generates the main kpis such as ultimate loss, ibnr, loss development factors   
        Arguments --> A dictionnary of triangles or a single triangle,   
            the methods to derive the LDF (simple or volume average) defined as a list if there are several ultimate triangles to produce,   
            the number of periods to look at (-1 means all periods by default)   
            the origin/development pattern ('OxDy' with x and y in (Y, M, Q))   
        Returns --> a dictionnary storing the triangles and other kpis   
            the dict keys are 'ldf' for loss development factors, 'cdf' for the cumulative ones, 'fit' to get the fitted model and 'full_triangle' to get the full triangle produced

    """

    triangles_values = triangles.values() if isinstance(triangles,
                                                        dict) else [triangles]
    triangles_keys = triangles.keys() if isinstance(triangles, dict) else [1]

    selected_average_methods = ['volume'] * len(triangles_keys) if average_methods is None else \
                                average_methods if isinstance(average_methods, list) else [average_methods]

    selected_n_periods = [-1] * len(triangles_keys) if n_periods is None else \
                        n_periods if isinstance(n_periods, list) else [n_periods]

    # Gets the different types of figures we are studying (asif cost, cost excl LL, count, etc.)
    triangles_names = [triangle.columns[0] for triangle in triangles_values]

    # Builds the triangle transformer with development attributes ; loops through the triangles
    triangles_dev = [
        cl.Pipeline([('dev',
                      cl.Development(average=selected_average_methods[index],
                                     n_periods=selected_n_periods[index]))
                     ]).fit_transform(triangle.grain(grain))
        for index, triangle in enumerate(triangles_values)
    ]

    # Loops through the triangles_dev to derive the ldfs, cdfs and the fit method
    triangles_model = [(triangle_dev.ldf_, triangle_dev.cdf_,
                        cl.Chainladder().fit(triangle_dev))
                       for triangle_dev in triangles_dev]

    # Loops through the triangles_model to build a dict with the name of the figures (claims cost, count, etc.)
    # as primary key and the main triangle characteristics as second keys
    return {value: {
                    'ldf': triangles_model[index][0],
                    'cdf': triangles_model[index][1],
                    'fit': triangles_model[index][2],
                    'full_triangle': pd.concat([triangles_model[index][2].full_triangle_.to_frame(), triangles_model[index][2].ibnr_.to_frame()] \
                                               , axis=1).rename(columns={9999: 'Ultimates', value: 'IBNR'})
                    }
            for index, value in enumerate(triangles_names)}
Exemplo n.º 8
0
        '1/1/2005', '1/1/2006', '1/1/2007', '1/1/2008'
    ],
    'rate_change': [.02, .02, .02, .02, .05, .075, .15, .1, -.2, -.2]
})

# Loss on-leveling factors
tort_reform = pd.DataFrame({
    'date': ['1/1/2006', '1/1/2007'],
    'rate_change': [-0.1067, -.25]
})

# In addition to development, include onlevel estimator in pipeline for loss
pipe = cl.Pipeline(steps=[('olf',
                           cl.ParallelogramOLF(tort_reform,
                                               change_col='rate_change',
                                               date_col='date',
                                               vertical_line=True)
                           ), ('dev', cl.Development(
                               n_periods=2)), ('model',
                                               cl.CapeCod(trend=0.034))])

# Define X
X = cl.load_sample('xyz')['Incurred']

# Separately apply on-level factors for premium
sample_weight = cl.ParallelogramOLF(rate_history,
                                    change_col='rate_change',
                                    date_col='date',
                                    vertical_line=True).fit_transform(
                                        xyz['Premium'].latest_diagonal)

#  Fit Cod Estimator
====================================================
Basic Assumption Tuning with Pipeline and Gridsearch
====================================================

This example demonstrates testing multiple number of periods in the development
transformer to see its influence on the overall ultimate estimate.
"""

import seaborn as sns
sns.set_style('whitegrid')

import chainladder as cl

tri = cl.load_dataset('abc')

# Set up Pipeline
steps = [('dev', cl.Development()), ('chainladder', cl.Chainladder())]
params = dict(dev__n_periods=[item for item in range(2, 11)])
pipe = cl.Pipeline(steps=steps)

# Develop scoring function that returns an Ultimate/Incurred Ratio
scoring = lambda x: x.named_steps.chainladder.ultimate_.sum(
) / tri.latest_diagonal.sum()

# Run GridSearch
grid = cl.GridSearch(pipe, params, scoring).fit(tri)

# Plot Results
grid.results_.plot(x='dev__n_periods', y='score',
                   marker='o').set(ylabel='Ultimate / Incurred')