Exemplo n.º 1
0
def _train_rsf(x, t, e, folds, params):

    if params is None:

        num_trees = 50
        max_depth = 4

    else:

        num_trees = params['num_trees']
        max_depth = params['max_depth']

    xt = torch.from_numpy(x).double()
    tt = torch.from_numpy(t).double()
    et = torch.from_numpy(e).double()

    d = x.shape[1]

    fold_model = {}

    for f in set(folds):
        print("Starting Fold:", f)
        rsf = RandomSurvivalForestModel(num_trees=num_trees)
        rsf.fit(x[folds != f],
                t[folds != f],
                e[folds != f],
                max_depth=max_depth)
        fold_model[f] = copy.copy(rsf)
        print("Trained Fold:", f)
    return fold_model
def _train_rsf(x, t, e, folds):

  fold_model = {}

  for f in set(folds):
    rsf = RandomSurvivalForestModel()
    rsf.fit(x[folds != f], t[folds != f], e[folds != f])
    fold_model[f] = copy.deepcopy(rsf)

  return fold_model
Exemplo n.º 3
0
 def _prep_model(X, T, E):
     xst = RandomSurvivalForestModel(num_trees=num_tree)
     xst.fit(X,
             T,
             E,
             max_features='sqrt',
             max_depth=max_depth,
             min_node_size=min_node,
             num_threads=-1,
             sample_size_pct=0.63,
             importance_mode='normalized_permutation',
             seed=None,
             save_memory=False)
     return xst
Exemplo n.º 4
0
def grid_search_and_retrain(X, T, E, num_tree, max_depth, min_node):
    b_num_tree, b_max_depth, b_min_node = grid_search_for_model(
        X, T, E, num_tree, max_depth, min_node)
    xst = RandomSurvivalForestModel(num_trees=b_num_tree)
    xst.fit(X,
            T,
            E,
            max_features='sqrt',
            max_depth=b_max_depth,
            min_node_size=b_min_node,
            num_threads=-1,
            sample_size_pct=0.63,
            importance_mode='normalized_permutation',
            seed=954,
            save_memory=False)
    return xst, b_num_tree, b_max_depth, b_min_node
Exemplo n.º 5
0
    def _model_factory(self,
                       n_trees=None,
                       n_input_features=None,
                       n_neurons=None):
        if self.algorithm == 'CPH':
            return CoxPHFitter()
        elif self.algorithm == 'RSF':
            return RandomSurvivalForestModel(num_trees=n_trees)
        elif self.algorithm in self._pycox_methods:
            net_args = {
                'in_features': n_input_features,
                'num_nodes': n_neurons,
                'batch_norm': True,
                'dropout': 0.1,
            }

            if self.algorithm == 'DeepSurv':
                net = tt.practical.MLPVanilla(out_features=1,
                                              output_bias=False,
                                              **net_args)
                model = CoxPH(net, tt.optim.Adam)

                return model
            if self.algorithm == 'CoxTime':
                net = MLPVanillaCoxTime(**net_args)
                model = CoxTime(net, tt.optim.Adam)

                return model
            if self.algorithm in self._discrete_time_methods:
                num_durations = 30
                print(f'   {num_durations} equidistant intervals')
            if self.algorithm == 'DeepHit':
                labtrans = DeepHitSingle.label_transform(num_durations)
                net = self._get_discrete_time_net(labtrans, net_args)
                model = DeepHitSingle(net,
                                      tt.optim.Adam,
                                      alpha=0.2,
                                      sigma=0.1,
                                      duration_index=labtrans.cuts)

                return model
            if self.algorithm == 'MTLR':
                labtrans = MTLR.label_transform(num_durations)
                net = self._get_discrete_time_net(labtrans, net_args)
                model = MTLR(net, tt.optim.Adam, duration_index=labtrans.cuts)

                return model
            if self.algorithm == 'Nnet-survival':
                labtrans = LogisticHazard.label_transform(num_durations)
                net = self._get_discrete_time_net(labtrans, net_args)
                model = LogisticHazard(net,
                                       tt.optim.Adam(0.01),
                                       duration_index=labtrans.cuts)

                return model
        else:
            raise Exception('Unrecognized model.')
Exemplo n.º 6
0
X_rsf = X.drop('tenure',axis=1)
T_rsf = X['tenure'].values
E_rsf = y

index_train, index_test = X_train.index, X_test.index

X_rsf_train, X_rsf_test = X_rsf.loc[index_train,:], X_rsf.loc[index_test,:]
T_rsf_train, T_rsf_test = T_rsf[index_train], T_rsf[index_test]
E_rsf_train, E_rsf_test = E_rsf[index_train], E_rsf[index_test]

km_base_model = KaplanMeierModel()
km_base_model.fit(T_rsf_test, E_rsf_test)

cv = RepeatedStratifiedKFold(n_splits=5,n_repeats=2,random_state=21)

rsf = RandomSurvivalForestModel(num_trees=200)
#rsf_num_trees = [100,200,300,500]
#rsf_max_depth = [5,10,15,20,25,30,35,40,45,50]
#rsf_min_node_size = [3, 5, 7, 9]
#param_grid_rsf = {'num_trees':rsf_num_trees,'max_depth':rsf_max_depth,'min_node_size':rsf_min_node_size}
rsf.fit(X_rsf_train,T_rsf_train,E_rsf_train,max_features='sqrt',max_depth=36,min_node_size=4,seed=21)
#rsf_cv = RandomizedSearchCV(rsf, param_distributions=param_grid_rsf, cv=cv,scoring='accuracy',random_state=42,)
#rsf_cv.fit(X_rsf_train,T_rsf_train,E_rsf_train)
c_index = concordance_index(rsf,X_rsf_test,T_rsf_test,E_rsf_test)
print('C-index: {:0.2f}'.format(c_index))
ibs = integrated_brier_score(rsf, X_rsf_test, T_rsf_test, E_rsf_test)
print('IBS: {:0.2f}'.format(ibs))

# Initializing the figure
fig, ax = plt.subplots(figsize=(8, 4))
Exemplo n.º 7
0
def load_model(path_file):
    """ Load the model and its parameters from a .zip file 

    Parameters:
    -----------
    * path_file, str
        address of the file where the model will be loaded from 

    Returns:
    --------
    * pysurvival_model : Pysurvival object
        Pysurvival model
    """

    # Initializing a base model
    from pysurvival.models import BaseModel
    base_model = BaseModel()

    # Temporary loading the model
    base_model.load(path_file)
    model_name = base_model.name

    # Loading the actual Pysurvival model - Kaplan-Meier
    if 'kaplanmeier' in model_name.lower():

        if 'smooth' in model_name.lower():
            from pysurvival.models.non_parametric import SmoothKaplanMeierModel
            pysurvival_model = SmoothKaplanMeierModel()

        else:
            from pysurvival.models.non_parametric import KaplanMeierModel
            pysurvival_model = KaplanMeierModel()

    elif 'linearmultitask' in model_name.lower():

        from pysurvival.models.multi_task import LinearMultiTaskModel
        pysurvival_model = LinearMultiTaskModel()

    elif 'neuralmultitask' in model_name.lower():

        from pysurvival.models.multi_task import NeuralMultiTaskModel
        structure = [
            {
                'activation': 'relu',
                'num_units': 128
            },
        ]
        pysurvival_model = NeuralMultiTaskModel(structure=structure)

    elif 'exponential' in model_name.lower():

        from pysurvival.models.parametric import ExponentialModel
        pysurvival_model = ExponentialModel()

    elif 'weibull' in model_name.lower():

        from pysurvival.models.parametric import WeibullModel
        pysurvival_model = WeibullModel()

    elif 'gompertz' in model_name.lower():

        from pysurvival.models.parametric import GompertzModel
        pysurvival_model = GompertzModel()

    elif 'loglogistic' in model_name.lower():

        from pysurvival.models.parametric import LogLogisticModel
        pysurvival_model = LogLogisticModel()

    elif 'lognormal' in model_name.lower():

        from pysurvival.models.parametric import LogNormalModel
        pysurvival_model = LogNormalModel()

    elif 'simulation' in model_name.lower():

        from pysurvival.models.simulations import SimulationModel
        pysurvival_model = SimulationModel()

    elif 'coxph' in model_name.lower():

        if 'nonlinear' in model_name.lower():
            from pysurvival.models.semi_parametric import NonLinearCoxPHModel
            pysurvival_model = NonLinearCoxPHModel()

        else:
            from pysurvival.models.semi_parametric import CoxPHModel
            pysurvival_model = CoxPHModel()

    elif 'random' in model_name.lower() and 'survival' in model_name.lower():

        from pysurvival.models.survival_forest import RandomSurvivalForestModel
        pysurvival_model = RandomSurvivalForestModel()

    elif 'extra' in model_name.lower() and 'survival' in model_name.lower():

        from pysurvival.models.survival_forest import ExtraSurvivalTreesModel
        pysurvival_model = ExtraSurvivalTreesModel()

    elif 'condi' in model_name.lower() and 'survival' in model_name.lower():

        from pysurvival.models.survival_forest import ConditionalSurvivalForestModel
        pysurvival_model = ConditionalSurvivalForestModel()

    elif 'svm' in model_name.lower():

        if 'linear' in model_name.lower():

            from pysurvival.models.svm import LinearSVMModel
            pysurvival_model = LinearSVMModel()

        elif 'kernel' in model_name.lower():

            from pysurvival.models.svm import KernelSVMModel
            pysurvival_model = KernelSVMModel()

    else:
        raise NotImplementedError(
            '{} is not a valid pysurvival model.'.format(model_name))

    # Transferring the components
    pysurvival_model.__dict__.update(copy.deepcopy(base_model.__dict__))
    del base_model

    return pysurvival_model
Exemplo n.º 8
0
# Building training and testing sets
from sklearn.model_selection import train_test_split
index_train, index_test = train_test_split( range(N), test_size = 0.4)
data_train = df.loc[index_train].reset_index( drop = True )
data_test  = df.loc[index_test].reset_index( drop = True )

# Creating the X, T and E inputs
X_train, X_test = df[features], data_test[features]
T_train, T_test = df[time_column], data_test[time_column]
E_train, E_test = df[event_column], data_test[event_column]


#from pysurvival.models.survival_forest import ConditionalSurvivalForestModel
from pysurvival.models.survival_forest import RandomSurvivalForestModel
# Fitting the model
csf = RandomSurvivalForestModel(num_trees=200)
csf.fit(X_train, T_train, E_train, max_features='sqrt',
        max_depth=5, min_node_size=20)


csf.variable_importance_table


from pysurvival.utils.metrics import concordance_index
c_index = concordance_index(csf, X_test, T_test, E_test)
print('C-index: {:.2f}'.format(c_index)) #0.83


from pysurvival.utils.display import integrated_brier_score
ibs = integrated_brier_score(csf, X_test, T_test, E_test, t_max=12,
    figure_size=(12,5))
num_tree=(10, 15, 20, 50, 100)
max_depth=(1, 2, 3, 5, 10, 12, 15)
min_node=(1, 2, 3, 5, 10, 12)

for a in num_tree:
    for b in max_depth:
        for c in min_node:
            cc = []
            kf = StratifiedKFold(n_splits=7, random_state=42, shuffle=True)
            i = 1
            for train_index, test_index in kf.split(Xtemp,Etemp):
                X1_train, X1_test = Xtemp.loc[train_index], Xtemp.loc[test_index]
                X_train, X_test = X1_train[featuresTemp], X1_test[featuresTemp]
                T_train, T_test = X1_train['NumDays'].values, X1_test['NumDays'].values
                E_train, E_test = Etemp.loc[train_index].values, Etemp.loc[test_index].values
                xst = RandomSurvivalForestModel(num_trees=a) 
                xst.fit(X_train, T_train, E_train, max_features = 'sqrt', max_depth = b,
                min_node_size = c, num_threads = -1, 
                sample_size_pct = 0.63, importance_mode = 'normalized_permutation',
                seed = None, save_memory = False )
                c_index = concordance_index(xst, X_test, T_test, E_test)
                cc.append(c_index)
                i = i+1
            print(a,b, c, mean(cc))
                    

CI = []
IBS = []
best_num_tree = 15
best_depth = 10
best_min_node = 5
def run_pysurvival_with_repetitions(data,
                                    features,
                                    survival,
                                    event,
                                    models,
                                    test_ratio,
                                    repetitions=10):

    num_samples = len(data.index)
    print('Number of Samples:', num_samples)
    ''' Initialize Outputs '''
    outputs = initialize_outputs(models, features)
    ''' Run Survival Model N times '''
    for _ in range(repetitions):
        ''' Dataset Splitting '''
        index_train, index_test = train_test_split(range(num_samples),
                                                   test_size=test_ratio)
        data_train = data.loc[index_train].reset_index(drop=True)
        data_test = data.loc[index_test].reset_index(drop=True)

        X_train, X_test = data_train[features], data_test[features]
        T_train, T_test = data_train[survival].values, data_test[
            survival].values
        E_train, E_test = data_train[event].values, data_test[event].values
        ''' Run Cox '''
        if 'cox' in models:
            coxph = CoxPHModel()
            coxph.fit(X_train,
                      T_train,
                      E_train,
                      lr=0.0001,
                      l2_reg=1e-2,
                      init_method='zeros',
                      verbose=False)
            c_index = concordance_index(coxph, X_test, T_test, E_test)
            outputs['cox']['c_index'].append(c_index)
            ibs = integrated_brier_score(coxph,
                                         X_test,
                                         T_test,
                                         E_test,
                                         t_max=None)
            outputs['cox']['ibs'].append(ibs)
            for idx, i in enumerate(features):
                outputs['cox']['weights'][i].append(coxph.weights[idx])
        ''' Run RSF '''
        if 'rsf' in models:
            rsf = RandomSurvivalForestModel(num_trees=200)
            rsf.fit(X_train,
                    T_train,
                    E_train,
                    max_features="sqrt",
                    max_depth=5,
                    min_node_size=20)
            c_index = concordance_index(rsf, X_test, T_test, E_test)
            outputs['rsf']['c_index'].append(c_index)
            ibs = integrated_brier_score(rsf,
                                         X_test,
                                         T_test,
                                         E_test,
                                         t_max=None)
            outputs['rsf']['ibs'].append(ibs)
            for key, value in rsf.variable_importance.items():
                outputs['rsf']['importance'][key].append(value)
        ''' Run Deepsurv '''
        if 'deepsurv' in models:
            structure = [{
                'activation': 'ReLU',
                'num_units': 128
            }, {
                'activation': 'ReLU',
                'num_units': 128
            }, {
                'activation': 'ReLU',
                'num_units': 128
            }]

            nonlinear_coxph = NonLinearCoxPHModel(structure=structure)
            nonlinear_coxph.fit(X_train,
                                T_train,
                                E_train,
                                lr=1e-4,
                                init_method='xav_uniform',
                                verbose=False)
            c_index = concordance_index(nonlinear_coxph, X_test, T_test,
                                        E_test)
            outputs['deepsurv']['c_index'].append(c_index)
            ibs = integrated_brier_score(nonlinear_coxph,
                                         X_test,
                                         T_test,
                                         E_test,
                                         t_max=None)
            outputs['deepsurv']['ibs'].append(ibs)

    return outputs
Exemplo n.º 11
0
 def build_random_forest(self, num_trees=500):
     self.model = RandomSurvivalForestModel(num_trees=num_trees)