def _CalculateSurvivalFunction_pysurvival(self, x, folder):
        ## Read in models
        startTime = time.time()
        models = [
            load_model('./' + folder + '/DeepSurv' + str(k + 1) + '.zip')
            for k in range(50)
        ]

        ## Calculate Survival functions
        t = np.linspace(12, 120, 240)
        s_arr = []
        for model in models:
            s = model.predict_survival(x)[0]
            f = interpolate.interp1d(model.times, s)
            s_arr.append(f(t))
        s_arr_mean = np.mean(s_arr, axis=0)
        s_arr_std = np.std(s_arr, axis=0)
        endTime = time.time()
        print('Total Time = %f seconds' % (endTime - startTime))
        # self._PlotFigure(s_arr_mean, s_arr_std, x, t)
        return {
            'time': t,
            's_mean': s_arr_mean,
            's_std': s_arr_std,
            'runtime': endTime - startTime
        }
Exemplo n.º 2
0
def get_predicted_curves(cutaneous_biopsy_ulceration, scenario, cutaneous_biopsy_histological_subtype, cutaneous_biopsy_breslow,
    total_count_slnb_ldn, visceral_metastasis_location,total_positives_slnb_ldn,patient_hair_color,
    cutaneous_biopsy_lymphatic_invasion,patient_eye_color,cutaneous_biopsy_mitotic_index,age,
    patient_phototype, cutaneous_biopsy_satellitosis, MC1R,cutaneous_biopsy_vascular_invasion,
    cutaneous_biopsy_regression,LAB2419,T0_date,LAB2406,LAB1307,patient_gender,LAB2469,LAB2544,
    neutrofils_per_limfocits,cutaneous_biopsy_neurotropism,LAB2467,LAB1309,primary_tumour_location_coded,
    LAB2476,LAB2679,LAB2404, cutaneous_biopsy_predominant_cell_type, LAB2407,LAB1301,LAB2498):
  
    """
    This function returns x and y values to plot the survivorship curve of a test for a patient.
    @args:
    -cutaneous_biopsy_ulceration...LAB2498: numerical variables from the patient and tests 
    @returns:
    - curve_x: the x values for the survivorship curve. This indicates time in months.
    - curve_y: the y values for the survivorship curve. This indicates probability to survive.
    """

   

    #load model
    estimator_loaded = load_model('data/ExtraST_model.zip')

    #load features used by the model
    features = pd.read_csv("data/Features_ExtraST_model.csv").iloc[:,1]
    
    missBIO2=0
    
    test = pd.DataFrame([[cutaneous_biopsy_ulceration, scenario, cutaneous_biopsy_histological_subtype,   cutaneous_biopsy_breslow,
        total_count_slnb_ldn, visceral_metastasis_location,total_positives_slnb_ldn,patient_hair_color,
        cutaneous_biopsy_lymphatic_invasion,patient_eye_color,cutaneous_biopsy_mitotic_index,age,
        patient_phototype, cutaneous_biopsy_satellitosis, MC1R,cutaneous_biopsy_vascular_invasion,
        cutaneous_biopsy_regression,LAB2419,T0_date,missBIO2,LAB2406,LAB1307,patient_gender,LAB2469,LAB2544,
        neutrofils_per_limfocits,cutaneous_biopsy_neurotropism,LAB2467,LAB1309,primary_tumour_location_coded,
        LAB2476,LAB2679,LAB2404, cutaneous_biopsy_predominant_cell_type, LAB2407,LAB1301,LAB2498]], columns=features)

    n_tests = test.shape[0]
    #Fill missing columns (corresponding to missing values)
    test.fillna(0, inplace=True)

    #Target encoding + normalization
    test = preprocess_newdata(test)

    #Select only the features used by the classifier
    test = test[features.values]

    #Predict survival curve
    curve_y = estimator_loaded.predict_survival(test.values).flatten()[0:120]
    curve_x = np.arange(1,len(curve_y)+1,1)
    
    risk_group = identify_risk_group_df(test, estimator_loaded)
    
    
    return curve_x, curve_y, risk_group
Exemplo n.º 3
0
def identify_risk_group(test_df):
    """
    This function returns the group of risk of the given set of patients test_df
    given as a dataframe. The variable thr_file gives the vsc file storing the
    thresholds information.
    For instance, it can be called:
    identify_risk_group(pd.read_csv('data/train.csv'))
    """

    ############# Preprocess data according to the model #############

    #load model
    estimator_loaded = load_model('trained_models/ExtraST_model.zip')

    #load features used by the model
    features = pd.read_csv('trained_models/Features_ExtraST_model.csv').iloc[:,1]

    #Fill missing columns (corresponding to missing values)
    test_df.fillna(0, inplace=True)

    #Target encoding + normalization
    test_df = preprocess_newdata(test_df)

    #Select only the features used by the classifier
    test_df = test_df[features.values]

    ############# Identify risk group #############

    #read risk encoding information
    thr_info = pd.read_csv('thresholds.csv')
    thr_12 = thr_info['Threshold1-2'][0]
    thr_23 = thr_info['Threshold2-3'][0]
    norm_info = thr_info['Normalization_max'][0]

    #get the risk for each patient
    risk = estimator_loaded.predict_risk(test_df)

    #normalize the risk
    risk = np.log(risk)/norm_info

    #identify the risk group
    risk_group = np.zeros([len(risk)])
    for i in range(len(risk)):
        if risk[i]<thr_12:
            risk_group[i] = int(1)
        elif thr_12 < risk[i] < thr_23:
            risk_group[i] = int(2)
        else:
            risk_group[i] = int(3)

    return risk_group
Exemplo n.º 4
0
def identify_risk_group(
        cutaneous_biopsy_ulceration, scenario,
        cutaneous_biopsy_histological_subtype, cutaneous_biopsy_breslow,
        total_count_slnb_ldn, visceral_metastasis_location,
        total_positives_slnb_ldn, patient_hair_color,
        cutaneous_biopsy_lymphatic_invasion, patient_eye_color,
        cutaneous_biopsy_mitotic_index, age, patient_phototype,
        cutaneous_biopsy_satellitosis, MC1R,
        cutaneous_biopsy_vascular_invasion, cutaneous_biopsy_regression,
        LAB2419, T0_date, LAB2406, LAB1307, patient_gender, LAB2469, LAB2544,
        neutrofils_per_limfocits, cutaneous_biopsy_neurotropism, LAB2467,
        LAB1309, primary_tumour_location_coded, LAB2476, LAB2679, LAB2404,
        cutaneous_biopsy_predominant_cell_type, LAB2407, LAB1301, LAB2498):
    """
    This function returns the group of risk of the given set of patients test_df
    given as a dataframe. The variable thr_file gives the vsc file storing the
    thresholds information.
    """

    ############# Preprocess data according to the model #############

    #load model
    estimator_loaded = load_model('data/ExtraST_model.zip')

    #load features used by the model
    features = pd.read_csv('data/Features_ExtraST_model.csv').iloc[:, 1]

    missBIO2 = 0

    test_df = pd.DataFrame([[
        cutaneous_biopsy_ulceration, scenario,
        cutaneous_biopsy_histological_subtype, cutaneous_biopsy_breslow,
        total_count_slnb_ldn, visceral_metastasis_location,
        total_positives_slnb_ldn, patient_hair_color,
        cutaneous_biopsy_lymphatic_invasion, patient_eye_color,
        cutaneous_biopsy_mitotic_index, age, patient_phototype,
        cutaneous_biopsy_satellitosis, MC1R,
        cutaneous_biopsy_vascular_invasion, cutaneous_biopsy_regression,
        LAB2419, T0_date, missBIO2, LAB2406, LAB1307, patient_gender, LAB2469,
        LAB2544, neutrofils_per_limfocits, cutaneous_biopsy_neurotropism,
        LAB2467, LAB1309, primary_tumour_location_coded, LAB2476, LAB2679,
        LAB2404, cutaneous_biopsy_predominant_cell_type, LAB2407, LAB1301,
        LAB2498
    ]],
                           columns=features)

    #Fill missing columns (corresponding to missing values)
    test_df.fillna(0, inplace=True)

    #Select only the features used by the classifier
    test_df = test_df[features.values]

    #Target encoding + normalization
    test_df = preprocess_newdata(test_df)

    ############# Identify risk group #############

    #read risk encoding information
    thr_info = pd.read_csv('data/thresholds.csv')
    thr_12 = thr_info['Threshold1-2'][0]
    thr_23 = thr_info['Threshold2-3'][0]
    norm_info = thr_info['Normalization_max'][0]

    #get the risk for each patient
    risk = estimator_loaded.predict_risk(test_df)

    #normalize the risk
    risk = np.log(risk) / norm_info

    #identify the risk group
    if risk < thr_12:
        risk_group = int(1)
    elif thr_12 < risk < thr_23:
        risk_group = int(2)
    else:
        risk_group = int(3)

    return risk_group
import numpy as np
import pandas as pd
from pysurvival.utils import load_model
import matplotlib.pyplot as plt

# Import selected features from finalPrediction csv
predictionData = pd.read_csv('./finalPredictionData.csv')
# Remove unnecessary columns
onlyPredictionData = predictionData.drop(columns = [ 'AliveStatus0Dead1Alive','NumDays'])

survivalModel = load_model('./survival_model.zip')

preds = survivalModel.predict_survival(onlyPredictionData)
preds_df = pd.DataFrame(preds).T
preds_df.to_excel('preds.xlsx') 

plt.plot(preds_df, label = "Survival Data")
plt.legend()
plt.show()
Exemplo n.º 6
0
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
import plotly.express as px
from pysurvival.models.semi_parametric import NonLinearCoxPHModel
import pandas as pd
from dash.dependencies import Input, Output, State
from pysurvival.utils import load_model
from toolbox import *
from dash_table import DataTable

#read torch models

pfsMod = load_model('modelData/final_pfs.zip')
rfsMod = load_model('modelData/final_rfs.zip')
pfsModMMC = load_model('modelData/MMCPFS.zip')
rfsModMMC = load_model('modelData/MMCRFS.zip')

external_stylesheets = [
    'https://codepen.io/chriddyp/pen/bWLwgP.css', dbc.themes.GRID
]

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

#app.css.config.serve_locally = True
#app.scripts.config.serve_locally = True

app.layout = html.P(
    id='page_content',
    className='app_body',
def retrieving_MaxMin_riskGroup(risk_group, data_path = 'data/train.csv'):
    """
    This function retrieves the survival curve of the patient with maximum and minimum
    risk for the risk_group specified in the input.
    risk_group can be 1, 2 or 3
    The output is: times (domain of the curve), survival_curve_min (curve of the
    minimum risk patient) and survival_curve_max (curve of the maximum risk patient)
    """

    ############# Preprocess data according to the model #############

    #load model
    estimator_loaded = load_model('data/ExtraST_model.zip')

    #load features used by the model
    features = pd.read_csv('data/Features_ExtraST_model.csv').iloc[:,1]
    features.drop(features[features == 'missBIO2'].index, inplace = True)

    #read the dataset
    test_df = pd.read_csv(data_path)

    #Select only the features used by the classifier
    test_df = test_df[features]

    #Fill missing columns (corresponding to missing values)
    test_df.fillna(0, inplace=True)

    #Target encoding + normalization
    test_df = preprocess_newdata(test_df)

    #read risk encoding information
    thr_info = pd.read_csv('data/thresholds.csv')
    thr_12 = thr_info['Threshold1-2'][0]
    thr_23 = thr_info['Threshold2-3'][0]
    norm_info = thr_info['Normalization_max'][0]

    #get the risk of the patients
    risk = estimator_loaded.predict_risk(test_df)
    risk = np.log(risk)/norm_info

    #cluster the patients according to their risk group
    if risk_group == 1:
        patient_index = np.where(risk < thr_12)[0]
    elif risk_group == 2:
        patient_index = np.where((thr_12 < risk) & (risk < thr_23))[0]
    else:
        patient_index = np.where(risk>thr_23)[0]

    #get the index of the patient with maximum and minimum risk within each group
    aux = np.argmax(risk[patient_index])
    maximum_patient = patient_index[aux]
    aux = np.argmin(risk[patient_index])
    minimum_patient = patient_index[aux]

    survival_curve_min = estimator_loaded.predict_survival(test_df.iloc[minimum_patient]).flatten()
    survival_curve_max = estimator_loaded.predict_survival(test_df.iloc[maximum_patient]).flatten()
    times = np.arange(survival_curve_max.shape[0])

    return  times, survival_curve_min, survival_curve_max


    """