# 6-> Simplest absorbing state case for validation purposes

# DURATION TYPE DATASETS (Long format)
# 7-> S&P Credit Rating Migration Matrix
# 8-> Simplest absorbing state case for validation purposes (Duration estimator)
# 9-> Example with dates in string formats

dataset = 9

#
# Duration type datasets in Compact Format
#
if dataset == 1:
    # This dataset simulates single entity transitions
    # State Space definition
    myState = tm.StateSpace([('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")])
    # myState.describe()
    # n: number of entities
    # s: number of samples per entity
    data = dataset_generators.exponential_transitions(myState,
                                                      n=1,
                                                      sample=100,
                                                      rate=0.1)
    sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
    sorted_data.to_csv(dataset_path + 'synthetic_data1.csv', index=False)

elif dataset == 2:
    # Second example: Multiple Entities observed over continuous short time interval
    myState = tm.StateSpace([('0', "Basic"), ('1', "Default")])
    data = dataset_generators.exponential_transitions(myState,
                                                      n=1000,
Пример #2
0
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied. See the License for the specific language governing permissions and
# limitations under the License.

# Example script. Open Risk Academy Course Step 4.


import pandas as pd
import transitionMatrix as tm
from transitionMatrix.estimators import cohort_estimator as es

dataset_path = "../../datasets/"

data = pd.read_csv(dataset_path + 'synthetic_data2.csv', dtype={'State': str})
sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])

myState = tm.StateSpace([('0', "Basic"), ('1', "Default")])
myState.describe()
print(myState.validate_dataset(dataset=sorted_data))

cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5)

myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
labels = {'Timestamp': 'Cohort', 'State': 'State', 'ID': 'ID'}
result = myEstimator.fit(cohort_data, labels=labels)

myMatrixSet = tm.TransitionMatrixSet(values=result, temporal_type='Incremental')
print(myMatrixSet.temporal_type)
myMatrixSet.print_matrix()
import transitionMatrix as tm
from transitionMatrix import source_path
from transitionMatrix.estimators import simple_estimator as es

dataset_path = source_path + "datasets/"

# Example: LendingClub Style Migration Matrix Set
# Load historical data into pandas frame
# Format:
# Expected Data Format is (ID, State_IN, State_OUT)

definition = [('A', "Grade A"), ('B', "Grade B"), ('C', "Grade C"),
               ('D', "Grade D"), ('E', "Grade E"), ('F', "Grade F"),
               ('G', "Grade G"), ('H', "Delinquent"), ('I', "Charged Off"),
               ('J', "Repaid")]
myState = tm.StateSpace(definition)

# Load the data sets into a pandas frame in sequence
# Check matrix_lendingclub.py for comments

matrix_set = []
for letter in ['a', 'b', 'c', 'd']:
    # store the derived one-period matrices in a list
    data = pd.read_csv(dataset_path + 'LoanStats3' + letter + '_Step2.csv')
    myEstimator = es.SimpleEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
    result = myEstimator.fit(data)
    myEstimator.summary()
    myMatrix = tm.TransitionMatrix(result)
    myMatrix[7, 9] = 1.0
    myMatrix[8, 9] = 1.0
    myMatrix[9, 9] = 1.0
Пример #4
0
 def test_validate_dataset(self):
     dataset_path = source_path + "datasets/"
     data = pd.read_csv(dataset_path + 'test.csv', dtype={'State': str})
     description = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
     s = tm.StateSpace(description)
     self.assertEqual(s.validate_dataset(dataset=data)[0], "Dataset contains the expected states.")
Пример #5
0
                      bottom=0.1,
                      right=0.9,
                      top=0.9,
                      wspace=0,
                      hspace=0.4)
    f.suptitle(summary, fontsize=12)
    plt.show()

elif example == 3:
    #
    #  Histogram Plots of transition frequencies
    #
    data = pd.read_csv('../datasets/synthetic_data5.csv', dtype={'State': str})
    sorted_data = data.sort_values(['ID', 'Timestep'], ascending=[True, True])
    description = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
    myState = tm.StateSpace(description)
    myState.describe()
    myEstimator = es.CohortEstimator(states=myState,
                                     ci={
                                         'method': 'goodman',
                                         'alpha': 0.05
                                     })
    result = myEstimator.fit(sorted_data)

    # Packaging step
    viz_data = []
    for k in range(len(result)):
        for s in range(len(myState.get_states())):
            raw_data = result[k][s, :]
            viz_data.append(raw_data)
Пример #6
0
 def test_generic(self):
     s = tm.StateSpace()
     n = 10
     s.generic(n=n)
     self.assertEqual(s.get_state_labels()[n-1], str(n-1))
Пример #7
0
 def test_get_state_labels(self):
     description = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"),
                    ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]
     s = tm.StateSpace(description)
     self.assertEqual(s.get_state_labels()[0], 'AAA')
Пример #8
0
 def test_instantiate_state(self):
     description = [('0', "AAA"), ('1', "AA"), ('2', "A"), ('3', "BBB"),
                    ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]
     s = tm.StateSpace(description)
     self.assertEqual(s.description[0][1], 'AAA')
Пример #9
0
    def test_cohort_estimator_matrix(self):
        """
        Test that the estimated matrix is same as the matrix that was used to generate the data

        matrix = [[0.8, 0.15, 0.05],
                  [0.1, 0.7, 0.2],
                  [0.0, 0.0, 1.0]]

        """
        dataset_path = source_path + "datasets/"
        data = pd.read_csv(dataset_path + 'synthetic_data5.csv')
        definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
        myState = tm.StateSpace(definition)
        sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
        myEstimator = es.CohortEstimator(states=myState,
                                         cohort_bounds=[0, 1, 2, 3, 4],
                                         ci={
                                             'method': 'goodman',
                                             'alpha': 0.05
                                         })
        result = myEstimator.fit(sorted_data)
        am = myEstimator.average_matrix
        self.assertAlmostEqual(am[0, 0],
                               0.8,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[0, 1],
                               0.15,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[0, 2],
                               0.05,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[1, 0],
                               0.1,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[1, 1],
                               0.7,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[1, 2],
                               0.2,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[2, 0],
                               0.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[2, 1],
                               0.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[2, 2],
                               1.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
Пример #10
0
dataset_path = source_path + "datasets/"

# Select the example to run
# 1-> An example with limited data (dataset contains only one entity)
# 2-> A full example with a 2x2 matrix
# 3-> A full example with a 8x8 matrix

example = 4

if example == 1:

    # An example with limited data (dataset contains only one entity)
    data = pd.read_csv(dataset_path + 'synthetic_data1.csv',
                       dtype={'State': str})
    sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
    myState = tm.StateSpace([('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")])
    print("> Validate data set")
    print(myState.validate_dataset(dataset=sorted_data))
    # Bin the data into 5 intervals
    cohort_data, cohort_intervals = tm.utils.bin_timestamps(data, cohorts=5)
    print("> Cohort intervals: ", cohort_intervals)
    print(80 * '=')
    print("> Cohort data")
    print(cohort_data)
    myEstimator = es.CohortEstimator(states=myState,
                                     ci={
                                         'method': 'goodman',
                                         'alpha': 0.05
                                     })
    labels = {'Time': 'Cohort', 'State': 'State', 'ID': 'ID'}
    print(80 * '=')